diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 00000000..f8a20eda --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,8 @@ +# Contributing + +## Build documentation locally + +Using tox: +```shell +$ tox -e docs +``` diff --git a/.github/SECURITY.md b/.github/SECURITY.md new file mode 100644 index 00000000..e4f0e0b3 --- /dev/null +++ b/.github/SECURITY.md @@ -0,0 +1,13 @@ +# Security Policy + +## Supported Versions + +Security updates are applied only to the latest release. + +## Reporting a Vulnerability + +If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released. + +Please disclose it at [security advisory](https://github.com/PythonCharmers/python-future/security/advisories/new). + +This project is maintained by a team of volunteers on a reasonable-effort basis. As such, please give us at least 90 days to work on a fix before public exposure. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..bcefff65 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,46 @@ +name: CI + +on: + pull_request: + push: + +concurrency: + group: ${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + test: + strategy: + fail-fast: false + matrix: + versions: + # - python: "2.6" + - python: "2.7" + - python: "3.3" + - python: "3.4" + - python: "3.5" + - python: "3.6" + - python: "3.7" + - python: "3.8" + - python: "3.9" + + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - if: ${{ matrix.versions.python != '2.6' }} + run: | + docker build \ + . \ + --build-arg PYTHON_VERSION=${{ matrix.versions.python }} \ + -t jmadler/python-future-builder:${{ matrix.versions.python }} + - if: ${{ matrix.versions.python == '2.6' }} + run: | + docker build \ + . \ + -f 2.6.Dockerfile \ + -t jmadler/python-future-builder:${{ matrix.versions.python }} + - run: | + docker run \ + -e PYTHON_VERSION=${{ matrix.versions.python }} \ + jmadler/python-future-builder:${{ matrix.versions.python }} \ + /root/python-future/test.sh diff --git a/.gitignore b/.gitignore index 52146838..1b8eaeb5 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,12 @@ develop-eggs .installed.cfg lib lib64 +MANIFEST +MANIFEST.in + +# Backup files +*.bak +*.backup # Installer logs pip-log.txt @@ -38,3 +44,8 @@ nosetests.xml .project .pydevproject +# PyCharm / IntelliJ +.idea + +# Generated test file +mytempfile.py diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml new file mode 100644 index 00000000..dd8d0d65 --- /dev/null +++ b/.pre-commit-hooks.yaml @@ -0,0 +1,15 @@ +- id: futurize + name: futurize + description: Futurize your Py2 code to ensure it is runnable on Py3. + language: python + types: [python] + entry: futurize -w -n --no-diffs + args: [--stage1] + +- id: pasteurize + name: pasteurize + description: Pasteurize your Py3 code to ensure it is runnable on Py2. + language: python + language_version: python3 + types: [python] + entry: pasteurize -w -n --no-diffs diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 7b0fa4ce..00000000 --- a/.travis.yml +++ /dev/null @@ -1,14 +0,0 @@ -language: python -python: - - "3.3" - - "2.7" - - "2.6" -# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors -# These packages only exist on Ubuntu 13.04 and newer: -# before_install: -# - sudo apt-get install -qq libpython2.7-testsuite libpython3.3-testsuite -# No dependencies currently unless using Python 2.6. -install: - - if [[ $TRAVIS_PYTHON_VERSION == 2.6* ]]; then pip install -r requirements_py26.txt --use-mirrors; fi -# command to run tests, e.g. python setup.py test -script: python setup.py test diff --git a/2.6.Dockerfile b/2.6.Dockerfile new file mode 100644 index 00000000..efaf3809 --- /dev/null +++ b/2.6.Dockerfile @@ -0,0 +1,26 @@ +FROM mrupgrade/deadsnakes:2.6 + +RUN mkdir -p ~/.pip/ && echo '[global] \n\ +trusted-host = pypi.python.org\n\ + pypi.org\n\ + files.pythonhosted.org\n\ +' >> ~/.pip/pip.conf + +RUN apt-get update && \ + apt-get install -y curl + +RUN mkdir -p /root/pip && \ + cd /root/pip && \ + curl -O https://files.pythonhosted.org/packages/8a/e9/8468cd68b582b06ef554be0b96b59f59779627131aad48f8a5bce4b13450/wheel-0.29.0-py2.py3-none-any.whl && \ + curl -O https://files.pythonhosted.org/packages/31/77/3781f65cafe55480b56914def99022a5d2965a4bb269655c89ef2f1de3cd/importlib-1.0.4.zip && \ + curl -O https://files.pythonhosted.org/packages/ef/41/d8a61f1b2ba308e96b36106e95024977e30129355fd12087f23e4b9852a1/pytest-3.2.5-py2.py3-none-any.whl && \ + curl -O https://files.pythonhosted.org/packages/f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl && \ + curl -O https://files.pythonhosted.org/packages/72/20/7f0f433060a962200b7272b8c12ba90ef5b903e218174301d0abfd523813/unittest2-1.1.0-py2.py3-none-any.whl && \ + curl -O https://files.pythonhosted.org/packages/53/67/9620edf7803ab867b175e4fd23c7b8bd8eba11cb761514dcd2e726ef07da/py-1.4.34-py2.py3-none-any.whl && \ + curl -O https://files.pythonhosted.org/packages/53/25/ef88e8e45db141faa9598fbf7ad0062df8f50f881a36ed6a0073e1572126/ordereddict-1.1.tar.gz && \ + curl -O https://files.pythonhosted.org/packages/17/0a/6ac05a3723017a967193456a2efa0aa9ac4b51456891af1e2353bb9de21e/traceback2-1.4.0-py2.py3-none-any.whl && \ + curl -O https://files.pythonhosted.org/packages/65/26/32b8464df2a97e6dd1b656ed26b2c194606c16fe163c695a992b36c11cdf/six-1.13.0-py2.py3-none-any.whl && \ + curl -O https://files.pythonhosted.org/packages/c7/a3/c5da2a44c85bfbb6eebcfc1dde24933f8704441b98fdde6528f4831757a6/linecache2-1.0.0-py2.py3-none-any.whl + +WORKDIR /root/python-future +ADD . /root/python-future \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..c859757f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,7 @@ +ARG PYTHON_VERSION +FROM python:${PYTHON_VERSION}-slim + +ENV LC_ALL=C.UTF-8 + +WORKDIR /root/python-future +ADD . /root/python-future diff --git a/LICENSE.txt b/LICENSE.txt index 65c70446..275cafd3 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2013-2014 Python Charmers Pty Ltd, Australia +Copyright (c) 2013-2024 Python Charmers, Australia Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/MANIFEST.in b/MANIFEST.in index d050d284..d0e9f3d1 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,9 +1,27 @@ -include README.rst -include futurize.py -include discover_tests.py -include LICENSE.txt -include ChangeLog.txt -include docs/*.py -include docs/*.txt -include docs/*.rst - +include *.sh +include *.txt +include *.rst +include *.py +include .travis.yml +include pytest.ini +recursive-include docs LICENSE +recursive-include docs README +recursive-include docs *.conf +recursive-include docs *.css_t +recursive-include docs *.html +recursive-include docs *.ico +recursive-include docs *.inc +recursive-include docs *.ipynb +recursive-include docs *.png +recursive-include docs *.py +recursive-include docs *.rst +recursive-include docs *.sh +recursive-include docs *.tiff +recursive-include docs *.txt +recursive-include docs Makefile +recursive-include src *.py +recursive-include src *.pem +recursive-include tests *.au +recursive-include tests *.gif +recursive-include tests *.py +recursive-include tests *.txt diff --git a/README.rst b/README.rst index 5d679deb..a3aceb7d 100644 --- a/README.rst +++ b/README.rst @@ -1,56 +1,91 @@ -future: an easier, safer, cleaner upgrade path to Python 3 -========================================================== +.. _overview: +Overview: Easy, clean, reliable Python 2/3 compatibility +======================================================== -Overview -======== +.. image:: https://github.com/PythonCharmers/python-future/actions/workflows/ci.yml/badge.svg?branch=master + :target: https://github.com/PythonCharmers/python-future/actions/workflows/ci.yml?query=branch%3Amaster -``future`` is the missing compatibility layer between Python 3 and Python -2. It allows you to maintain a single, clean Python 3.x-compatible -codebase with minimal cruft and run it easily on Python 2 without further -modification. +``python-future`` is the missing compatibility layer between Python 2 and +Python 3. It allows you to use a single, clean Python 3.x-compatible +codebase to support both Python 2 and Python 3 with minimal overhead. -``future`` comes with ``futurize``, a script that helps you to transition -to supporting both Python 2 and 3 in a single codebase, module by module. +It provides ``future`` and ``past`` packages with backports and forward +ports of features from Python 3 and 2. It also comes with ``futurize`` and +``pasteurize``, customized 2to3-based scripts that helps you to convert +either Py2 or Py3 code easily to support both Python 2 and 3 in a single +clean Py3-style codebase, module by module. +The ``python-future`` project has been downloaded over 1.7 billion times. + +.. _status + +Status +------ + +The ``python-future`` project was created in 2013 to attempt to save Python from +the schism of version incompatibility that was threatening to tear apart the +language (as Perl 6 contributed to the death of Perl). + +That time is now past. Thanks to a huge porting effort across the Python +community, Python 3 eventually thrived. Python 2 reached its end of life in +2020 and the ``python-future`` package should no longer be necessary. Use it to +help with porting legacy code to Python 3 but don't depend on it for new code. + +.. _features: Features -------- -- provides backports and remappings for 15 builtins with different - semantics on Py3 versus Py2 -- provides backports and remappings from the Py3 standard library -- 300+ unit tests -- ``futurize`` script based on ``2to3``, ``3to2`` and parts of - ``python-modernize`` for automatic conversion from either Py2 or Py3 to a - clean single-source codebase compatible with Python 2.6+ and Python 3.3+. -- a consistent set of utility functions and decorators selected from - Py2/3 compatibility interfaces from projects like ``six``, ``IPython``, - ``Jinja2``, ``Django``, and ``Pandas``. +- ``future.builtins`` package (also available as ``builtins`` on Py2) provides + backports and remappings for 20 builtins with different semantics on Py3 + versus Py2 + +- support for directly importing 30 standard library modules under + their Python 3 names on Py2 + +- support for importing the other 14 refactored standard library modules + under their Py3 names relatively cleanly via + ``future.standard_library`` and ``future.moves`` + +- ``past.builtins`` package provides forward-ports of 19 Python 2 types and + builtin functions. These can aid with per-module code migrations. + +- ``past.translation`` package supports transparent translation of Python 2 + modules to Python 3 upon import. [This feature is currently in alpha.] + +- 1000+ unit tests, including many from the Py3.3 source tree. + +- ``futurize`` and ``pasteurize`` scripts based on ``2to3`` and parts of + ``3to2`` and ``python-modernize``, for automatic conversion from either Py2 + or Py3 to a clean single-source codebase compatible with Python 2.6+ and + Python 3.3+. +- a curated set of utility functions and decorators in ``future.utils`` and + ``past.utils`` selected from Py2/3 compatibility interfaces from projects + like ``six``, ``IPython``, ``Jinja2``, ``Django``, and ``Pandas``. + +- support for the ``surrogateescape`` error handler when encoding and + decoding the backported ``str`` and ``bytes`` objects. [This feature is + currently in alpha.] + +- support for pre-commit hooks + +.. _code-examples: Code examples ------------- -``future`` is designed to be imported at the top of each Python module -together with Python's built-in ``__future__`` module like this:: +Replacements for Py2's built-in functions and types are designed to be imported +at the top of each Python module together with Python's built-in ``__future__`` +statements. For example, this code behaves identically on Python 2.6/2.7 after +these imports as it does on Python 3.3+: - from __future__ import (absolute_import, division, - print_function, unicode_literals) - from future import standard_library - from future.builtins import * - -followed by standard Python 3 code. The imports have no effect on Python -3 but allow the code to run mostly unchanged on Python 3 and Python 2.6/2.7. - -For example, this code behaves the same way on Python 2.6/2.7 after these -imports as it normally does on Python 3:: - - # Support for renamed standard library modules via import hooks - from http.client import HttpConnection - from itertools import filterfalse - import html.parser - import queue +.. code-block:: python + + from __future__ import absolute_import, division, print_function + from builtins import (bytes, str, open, super, range, + zip, round, input, int, pow, object) # Backported Py3 bytes object b = bytes(b'ABCD') @@ -65,15 +100,15 @@ imports as it normally does on Python 3:: assert s != bytes(b'ABCD') assert isinstance(s.encode('utf-8'), bytes) assert isinstance(b.decode('utf-8'), str) - assert repr(s) == 'ABCD' # consistent repr with Py3 (no u prefix) + assert repr(s) == "'ABCD'" # consistent repr with Py3 (no u prefix) # These raise TypeErrors: # bytes(b'B') in s # s.find(bytes(b'A')) # Extra arguments for the open() function f = open('japanese.txt', encoding='utf-8', errors='replace') - - # New simpler super() function: + + # New zero-argument super() function: class VerboseList(list): def append(self, item): print('Adding an item') @@ -82,52 +117,223 @@ imports as it normally does on Python 3:: # New iterable range object with slicing support for i in range(10**15)[:10]: pass - + # Other iterators: map, zip, filter my_iter = zip(range(3), ['a', 'b', 'c']) assert my_iter != list(my_iter) - + # The round() function behaves as it does in Python 3, using # "Banker's Rounding" to the nearest even last digit: assert round(0.1250, 2) == 0.12 - + # input() replaces Py2's raw_input() (with no eval()): name = input('What is your name? ') print('Hello ' + name) + # pow() supports fractional exponents of negative numbers like in Py3: + z = pow(-1, 0.5) + # Compatible output from isinstance() across Py2/3: assert isinstance(2**64, int) # long integers assert isinstance(u'blah', str) - assert isinstance('blah', str) # with unicode_literals in effect - assert isinstance(b'bytestring', bytes) + assert isinstance('blah', str) # only if unicode_literals is in effect + # Py3-style iterators written as new-style classes (subclasses of + # future.types.newobject) are automatically backward compatible with Py2: + class Upper(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def __next__(self): # note the Py3 interface + return next(self._iter).upper() + def __iter__(self): + return self + assert list(Upper('hello')) == list('HELLO') -Documentation -------------- -See http://python-future.org +There is also support for renamed standard library modules. The recommended +interface works like this: + +.. code-block:: python + + # Many Py3 module names are supported directly on both Py2.x and 3.x: + from http.client import HttpConnection + import html.parser + import queue + import xmlrpc.client + + # Refactored modules with clashing names on Py2 and Py3 are supported + # as follows: + from future import standard_library + standard_library.install_aliases() + + # Then, for example: + from itertools import filterfalse, zip_longest + from urllib.request import urlopen + from collections import ChainMap + from collections import UserDict, UserList, UserString + from subprocess import getoutput, getstatusoutput + from collections import Counter, OrderedDict # backported to Py2.6 + + +Automatic conversion to Py2/3-compatible code +--------------------------------------------- + +``python-future`` comes with two scripts called ``futurize`` and +``pasteurize`` to aid in making Python 2 code or Python 3 code compatible with +both platforms (Py2/3). It is based on 2to3 and uses fixers from ``lib2to3``, +``lib3to2``, and ``python-modernize``, as well as custom fixers. + +``futurize`` passes Python 2 code through all the appropriate fixers to turn it +into valid Python 3 code, and then adds ``__future__`` and ``future`` package +imports so that it also runs under Python 2. + +For conversions from Python 3 code to Py2/3, use the ``pasteurize`` script +instead. This converts Py3-only constructs (e.g. new metaclass syntax) to +Py2/3 compatible constructs and adds ``__future__`` and ``future`` imports to +the top of each module. + +In both cases, the result should be relatively clean Py3-style code that runs +mostly unchanged on both Python 2 and Python 3. + +Futurize: 2 to both +~~~~~~~~~~~~~~~~~~~ + +For example, running ``futurize -w mymodule.py`` turns this Python 2 code: + +.. code-block:: python + + import Queue + from urllib2 import urlopen + def greet(name): + print 'Hello', + print name -Credits -------- + print "What's your name?", + name = raw_input() + greet(name) -:Author: Ed Schofield -:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte - Ltd, Singapore. http://pythoncharmers.com -:Others: - The backported ``super()`` and ``range()`` functions are - derived from Ryan Kelly's ``magicsuper`` module and Dan - Crosta's ``xrange`` module. - - The ``futurize`` script uses ``lib2to3``, ``lib3to2``, and - parts of Armin Ronacher's ``python-modernize`` code. - - The ``python_2_unicode_compatible`` decorator is from - Django. The ``implements_iterator`` and ``with_metaclass`` - decorators are from Jinja2. - - Documentation is generated using ``sphinx`` using - ``sphinx_bootstrap_theme``. +into this code which runs on both Py2 and Py3: +.. code-block:: python + + from __future__ import print_function + from future import standard_library + standard_library.install_aliases() + from builtins import input + import queue + from urllib.request import urlopen + + def greet(name): + print('Hello', end=' ') + print(name) + + print("What's your name?", end=' ') + name = input() + greet(name) + +The first four lines have no effect under Python 3 and can be removed from +the codebase when Python 2 compatibility is no longer required. + +See :ref:`forwards-conversion` and :ref:`backwards-conversion` for more details. + + +Automatic translation +~~~~~~~~~~~~~~~~~~~~~ + +The ``past`` package can automatically translate some simple Python 2 +modules to Python 3 upon import. The goal is to support the "long tail" of +real-world Python 2 modules (e.g. on PyPI) that have not been ported yet. For +example, here is how to use a Python 2-only package called ``plotrique`` on +Python 3. First install it: + +.. code-block:: bash + + $ pip3 install plotrique==0.2.5-7 --no-compile # to ignore SyntaxErrors + +(or use ``pip`` if this points to your Py3 environment.) + +Then pass a whitelist of module name prefixes to the ``autotranslate()`` function. +Example: + +.. code-block:: bash + + $ python3 + + >>> from past.translation import autotranslate + >>> autotranslate(['plotrique']) + >>> import plotrique + +This transparently translates and runs the ``plotrique`` module and any +submodules in the ``plotrique`` package that ``plotrique`` imports. + +This is intended to help you migrate to Python 3 without the need for all +your code's dependencies to support Python 3 yet. It should be used as a +last resort; ideally Python 2-only dependencies should be ported +properly to a Python 2/3 compatible codebase using a tool like +``futurize`` and the changes should be pushed to the upstream project. + +Note: the auto-translation feature is still in alpha; it needs more testing and +development, and will likely never be perfect. + + +Pre-commit hooks +~~~~~~~~~~~~~~~~ + +`Pre-commit `_ is a framework for managing and maintaining +multi-language pre-commit hooks. + +In case you need to port your project from Python 2 to Python 3, you might consider +using such hook during the transition period. + +First: + +.. code-block:: bash + + $ pip install pre-commit + +and then in your project's directory: + +.. code-block:: bash + + $ pre-commit install + +Next, you need to add this entry to your ``.pre-commit-config.yaml`` + +.. code-block:: yaml + + - repo: https://github.com/PythonCharmers/python-future + rev: master + hooks: + - id: futurize + args: [--both-stages] + +The ``args`` part is optional, by default only stage1 is applied. Licensing --------- -Copyright 2013-2014 Python Charmers Pty Ltd, Australia. -The software is distributed under an MIT licence. See LICENSE.txt. +:Author: Ed Schofield, Jordan M. Adler, et al + +:Copyright: 2013-2024 Python Charmers, Australia. + +:Sponsors: Python Charmers: https://pythoncharmers.com + + Pinterest https://opensource.pinterest.com + +:Licence: MIT. See ``LICENSE.txt`` or `here `_. + +:Other credits: See `here `_. + +Docs +---- +See the docs `here `_. + +Next steps +---------- + +If you are new to Python-Future, check out the `Quickstart Guide +`_. + +For an update on changes in the latest version, see the `What's New +`_ page. diff --git a/TESTING.txt b/TESTING.txt new file mode 100644 index 00000000..b2ad5c65 --- /dev/null +++ b/TESTING.txt @@ -0,0 +1,11 @@ +A docker image, python-future-builder, is used to do testing and building. The test suite can be run with: + + $ bash build.sh + +which tests the module under a number of different python versions, where available, or with: + + $ py.test + +To execute a single test: + + $ pytest -k test_chained_exceptions_stacktrace diff --git a/discover_tests.py b/discover_tests.py deleted file mode 100644 index 8734cca8..00000000 --- a/discover_tests.py +++ /dev/null @@ -1,51 +0,0 @@ -""" -Simple auto test discovery. - -From http://stackoverflow.com/a/17004409 -""" -import os -import sys -import unittest - -if not hasattr(unittest.defaultTestLoader, 'discover'): - import unittest2 as unittest - -def additional_tests(): - setup_file = sys.modules['__main__'].__file__ - setup_dir = os.path.abspath(os.path.dirname(setup_file)) - testsuite = unittest.defaultTestLoader.discover(setup_dir) - blacklist = [] - if '/home/travis' in __file__: - # Skip some tests that fail on travis-ci - blacklist.append('test_command') - return exclude_tests(testsuite, blacklist) - - -class SkipCase(unittest.TestCase): - def runTest(self): - raise unittest.SkipTest("Test fails spuriously on travis-ci") - - -def exclude_tests(suite, blacklist): - """ - Example: - - blacklist = [ - 'test_some_test_that_should_be_skipped', - 'test_another_test_that_should_be_skipped' - ] - """ - - new_suite = unittest.TestSuite() - - for test_group in suite._tests: - for test in test_group: - if not hasattr(test, '_tests'): - # e.g. ModuleImportFailure - continue - for subtest in test._tests: - method = subtest._testMethodName - if method in blacklist: - setattr(test, method, getattr(SkipCase(), 'runTest')) - new_suite.addTest(test) - return new_suite diff --git a/docs/3rd-party-py3k-compat-code/ipython_py3compat.py b/docs/3rd-party-py3k-compat-code/ipython_py3compat.py index f80a6963..c9fbb2c1 100755 --- a/docs/3rd-party-py3k-compat-code/ipython_py3compat.py +++ b/docs/3rd-party-py3k-compat-code/ipython_py3compat.py @@ -41,9 +41,9 @@ def wrapper(func_or_str): else: func = func_or_str doc = func.__doc__ - + doc = str_change_func(doc) - + if func: func.__doc__ = doc return func @@ -52,97 +52,97 @@ def wrapper(func_or_str): if sys.version_info[0] >= 3: PY3 = True - + input = input builtin_mod_name = "builtins" - + str_to_unicode = no_code unicode_to_str = no_code str_to_bytes = encode bytes_to_str = decode cast_bytes_py2 = no_code - + def isidentifier(s, dotted=False): if dotted: return all(isidentifier(a) for a in s.split(".")) return s.isidentifier() - + open = orig_open - + MethodType = types.MethodType - + def execfile(fname, glob, loc=None): loc = loc if (loc is not None) else glob exec compile(open(fname, 'rb').read(), fname, 'exec') in glob, loc - + # Refactor print statements in doctests. _print_statement_re = re.compile(r"\bprint (?P.*)$", re.MULTILINE) def _print_statement_sub(match): expr = match.groups('expr') return "print(%s)" % expr - + @_modify_str_or_docstring def doctest_refactor_print(doc): """Refactor 'print x' statements in a doctest to print(x) style. 2to3 unfortunately doesn't pick up on our doctests. - + Can accept a string or a function, so it can be used as a decorator.""" return _print_statement_re.sub(_print_statement_sub, doc) - + # Abstract u'abc' syntax: @_modify_str_or_docstring def u_format(s): """"{u}'abc'" --> "'abc'" (Python 3) - + Accepts a string or a function, so it can be used as a decorator.""" return s.format(u='') else: PY3 = False - + input = raw_input builtin_mod_name = "__builtin__" - + str_to_unicode = decode unicode_to_str = encode str_to_bytes = no_code bytes_to_str = no_code cast_bytes_py2 = cast_bytes - + import re _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$") def isidentifier(s, dotted=False): if dotted: return all(isidentifier(a) for a in s.split(".")) return bool(_name_re.match(s)) - + class open(object): """Wrapper providing key part of Python 3 open() interface.""" def __init__(self, fname, mode="r", encoding="utf-8"): self.f = orig_open(fname, mode) self.enc = encoding - + def write(self, s): return self.f.write(s.encode(self.enc)) - + def read(self, size=-1): return self.f.read(size).decode(self.enc) - + def close(self): return self.f.close() - + def __enter__(self): return self - + def __exit__(self, etype, value, traceback): self.f.close() - + def MethodType(func, instance): return types.MethodType(func, instance, type(instance)) - + # don't override system execfile on 2.x: execfile = execfile - + def doctest_refactor_print(func_or_str): return func_or_str @@ -151,7 +151,7 @@ def doctest_refactor_print(func_or_str): @_modify_str_or_docstring def u_format(s): """"{u}'abc'" --> "u'abc'" (Python 2) - + Accepts a string or a function, so it can be used as a decorator.""" return s.format(u='u') diff --git a/docs/3rd-party-py3k-compat-code/jinja2_compat.py b/docs/3rd-party-py3k-compat-code/jinja2_compat.py index 1326cbc6..0456faae 100644 --- a/docs/3rd-party-py3k-compat-code/jinja2_compat.py +++ b/docs/3rd-party-py3k-compat-code/jinja2_compat.py @@ -85,7 +85,7 @@ def encode_filename(filename): def with_metaclass(meta, *bases): # This requires a bit of explanation: the basic idea is to make a - # dummy metaclass for one level of class instanciation that replaces + # dummy metaclass for one level of class instantiation that replaces # itself with the actual metaclass. Because of internal type checks # we also need to make sure that we downgrade the custom metaclass # for one level to something closer to type (that's why __call__ and diff --git a/docs/3rd-party-py3k-compat-code/pandas_py3k.py b/docs/3rd-party-py3k-compat-code/pandas_py3k.py index 6070c0e9..2a8eb5ae 100755 --- a/docs/3rd-party-py3k-compat-code/pandas_py3k.py +++ b/docs/3rd-party-py3k-compat-code/pandas_py3k.py @@ -14,7 +14,7 @@ * Uses the original method if available, otherwise uses items, keys, values. * types: * text_type: unicode in Python 2, str in Python 3 - * binary_type: str in Python 2, bythes in Python 3 + * binary_type: str in Python 2, bytes in Python 3 * string_types: basestring in Python 2, str in Python 3 * bind_method: binds functions to classes diff --git a/docs/_static/python-future-icon-32.ico b/docs/_static/python-future-icon-32.ico new file mode 100644 index 00000000..e3f2cf7f Binary files /dev/null and b/docs/_static/python-future-icon-32.ico differ diff --git a/docs/_static/python-future-icon-white-32.ico b/docs/_static/python-future-icon-white-32.ico new file mode 100644 index 00000000..3fa3dab9 Binary files /dev/null and b/docs/_static/python-future-icon-white-32.ico differ diff --git a/docs/_static/python-future-logo-textless-transparent.png b/docs/_static/python-future-logo-textless-transparent.png new file mode 100644 index 00000000..95ba682d Binary files /dev/null and b/docs/_static/python-future-logo-textless-transparent.png differ diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html index 09cdb2d2..c979ab2d 100644 --- a/docs/_templates/layout.html +++ b/docs/_templates/layout.html @@ -1,21 +1,28 @@ {# Import the theme's layout. #} {% extends "!layout.html" %} -{# Add some extra stuff before and use existing with 'super()' call. #} -{% block footer %} +{% block extrahead %} {{ super() }} - +{% endblock %} - +{% block footer %} +{{ super() }} + {% endblock %} diff --git a/docs/_templates/navbar.html b/docs/_templates/navbar.html index 7edada2a..fc96b5ca 100644 --- a/docs/_templates/navbar.html +++ b/docs/_templates/navbar.html @@ -12,7 +12,6 @@ - {% if theme_navbar_title -%}{{ theme_navbar_title|e }}{%- else -%}{{ project|e }}{%- endif -%} {{ version|e }} diff --git a/docs/_templates/sidebarintro.html b/docs/_templates/sidebarintro.html index 296f74b1..25325ec3 100644 --- a/docs/_templates/sidebarintro.html +++ b/docs/_templates/sidebarintro.html @@ -1,6 +1,7 @@ - Tools and help for an easier, safer, cleaner upgrade path to Python 3. +

Easy, clean, reliable Python 2/3 compatibility

+ Table of Contents -

Useful Links

+ diff --git a/docs/_templates/sidebartoc.html b/docs/_templates/sidebartoc.html new file mode 100644 index 00000000..629fb6a1 --- /dev/null +++ b/docs/_templates/sidebartoc.html @@ -0,0 +1 @@ +{{ toctree(maxdepth=2, collapse=True, includehidden=True) }} diff --git a/docs/_themes/future/static/future.css_t b/docs/_themes/future/static/future.css_t index 6130f5c3..593da466 100644 --- a/docs/_themes/future/static/future.css_t +++ b/docs/_themes/future/static/future.css_t @@ -14,11 +14,11 @@ {% set sidebar_width = '220px' %} {% set font_family = 'Geneva, sans serif' %} {% set header_font_family = 'Oxygen, ' ~ font_family %} - + @import url("basic.css"); - + /* -- page layout ----------------------------------------------------------- */ - + body { font-family: {{ font_family }}; font-size: 17px; @@ -49,7 +49,7 @@ div.sphinxsidebar { hr { border: 1px solid #B1B4B6; } - + div.body { background-color: #ffffff; color: #3E4349; @@ -60,7 +60,7 @@ img.floatingflask { padding: 0 0 10px 10px; float: right; } - + div.footer { width: {{ page_width }}; margin: 20px auto 30px auto; @@ -76,7 +76,7 @@ div.footer a { div.related { display: none; } - + div.sphinxsidebar a { color: #444; text-decoration: none; @@ -86,7 +86,7 @@ div.sphinxsidebar a { div.sphinxsidebar a:hover { border-bottom: 1px solid #999; } - + div.sphinxsidebar { font-size: 15px; line-height: 1.5; @@ -101,7 +101,7 @@ div.sphinxsidebarwrapper p.logo { margin: 0; text-align: center; } - + div.sphinxsidebar h3, div.sphinxsidebar h4 { font-family: {{ font_family }}; @@ -115,7 +115,7 @@ div.sphinxsidebar h4 { div.sphinxsidebar h4 { font-size: 20px; } - + div.sphinxsidebar h3 a { color: #444; } @@ -126,7 +126,7 @@ div.sphinxsidebar p.logo a:hover, div.sphinxsidebar h3 a:hover { border: none; } - + div.sphinxsidebar p { color: #555; margin: 10px 0; @@ -137,7 +137,7 @@ div.sphinxsidebar ul { padding: 0; color: #000; } - + div.sphinxsidebar input { border: 1px solid #ccc; font-family: {{ font_family }}; @@ -147,19 +147,19 @@ div.sphinxsidebar input { div.sphinxsidebar form.search input[name="q"] { width: 130px; } - + /* -- body styles ----------------------------------------------------------- */ - + a { color: #aa0000; text-decoration: underline; } - + a:hover { color: #dd0000; text-decoration: underline; } - + div.body h1, div.body h2, div.body h3, @@ -172,25 +172,25 @@ div.body h6 { padding: 0; color: black; } - + div.body h1 { margin-top: 0; padding-top: 0; font-size: 240%; } div.body h2 { font-size: 180%; } div.body h3 { font-size: 150%; } div.body h4 { font-size: 130%; } div.body h5 { font-size: 100%; } div.body h6 { font-size: 100%; } - + a.headerlink { color: #ddd; padding: 0 4px; text-decoration: none; } - + a.headerlink:hover { color: #444; background: #eaeaea; } - + div.body p, div.body dd, div.body li { line-height: 1.4em; } @@ -237,20 +237,20 @@ div.note { background-color: #eee; border: 1px solid #ccc; } - + div.seealso { background-color: #ffc; border: 1px solid #ff6; } - + div.topic { background-color: #eee; } - + p.admonition-title { display: inline; } - + p.admonition-title:after { content: ":"; } @@ -344,7 +344,7 @@ ul, ol { margin: 10px 0 10px 30px; padding: 0; } - + pre { background: #eee; padding: 7px 30px; @@ -361,7 +361,7 @@ dl dl pre { margin-left: -90px; padding-left: 90px; } - + tt { background-color: #E8EFF0; color: #222; diff --git a/docs/automatic_conversion.rst b/docs/automatic_conversion.rst index fe7f18c0..5c718da5 100644 --- a/docs/automatic_conversion.rst +++ b/docs/automatic_conversion.rst @@ -1,294 +1,29 @@ .. _automatic-conversion: -Automatic conversion with ``futurize`` -====================================== +Automatic conversion to Py2/3 +============================= -The ``future`` source tree includes a script called ``futurize`` to aid in -making either Python 2 code or Python 3 code compatible with both platforms -using the :mod:`future` module. It is based on 2to3 and uses fixers from -``lib2to3``, ``lib3to2``, and ``python-modernize``. +The ``future`` source tree includes scripts called ``futurize`` and +``pasteurize`` to aid in making Python 2 code or Python 3 code compatible with +both platforms (Py2/3) using the :mod:`future` module. These are based on +``lib2to3`` and use fixers from ``2to3``, ``3to2``, and ``python-modernize``. -For Python 2 code (the default), it runs the code through all the appropriate -2to3 fixers to turn it into valid Python 3 code, and then adds ``__future__`` -and ``future`` package imports. +``futurize`` passes Python 2 code through all the appropriate fixers to turn it +into valid Python 3 code, and then adds ``__future__`` and ``future`` package +imports. -For conversions from Python 3 code (with the ``--from3`` command-line option), -it fixes Py3-only syntax (e.g. metaclasses) and adds ``__future__`` and -``future`` imports to the top of each module. +For conversions from Python 3 code to Py2/3, use the ``pasteurize`` script +instead. This converts Py3-only constructs (e.g. new metaclass syntax) and adds +``__future__`` and ``future`` imports to the top of each module. -In both cases, the result should be relatively clean Py3-style code -that runs mostly unchanged on both Python 2 and Python 3. +In both cases, the result should be relatively clean Py3-style code that runs +mostly unchanged on both Python 2 and Python 3. -.. _forwards-conversion: -Forwards: 2 to both --------------------- +.. include:: futurize.rst -For example, running ``futurize`` turns this Python 2 code:: - - import ConfigParser - - class Blah(object): - pass - print 'Hello', - -into this code which runs on both Py2 and Py3:: - - from __future__ import print_function - from future import standard_library - - import configparser - - class Blah(object): - pass - print('Hello', end=' ') - - -To write out all the changes to your Python files that ``futurize`` suggests, use the ``-w`` flag. - -For complex projects, it may be better to divide the porting into two stages. Stage 1 is for "safe" changes that modernize the code but do not break Python 2.6 compatibility or introduce a depdendency on the ``future`` package. Stage 2 is to complete the process. - - -.. _forwards-conversion-stage1: - -Stage 1: "safe" fixes -~~~~~~~~~~~~~~~~~~~~~ - -Run with:: - - futurize --stage1 - -This applies fixes that modernize Python 2 code without changing the effect of -the code. With luck, this will not introduce any bugs into the code, or will at -least be trivial to fix. The changes are those that bring the Python code -up-to-date without breaking Py2 compatibility. The resulting code will be -modern Python 2.6-compatible code plus ``__future__`` imports from the -following set:: - - from __future__ import absolute_import - from __future__ import division - from __future__ import print_function - -Only those ``__future__`` imports deemed necessary will be added unless -the ``--all-imports`` command-line option is passed to ``futurize``, in -which case they are all added. - -The ``from __future__ import unicode_literals`` declaration is not added -during stage 1. - -The changes include:: - - - except MyException, e: - + except MyException as e: - - - print >>stderr, "Blah" - + from __future__ import print_function - + print("Blah", stderr) - -Implicit relative imports fixed, e.g.:: - - - import mymodule - + from __future__ import absolute_import - + from . import mymodule - -.. and all unprefixed string literals '...' gain a b prefix to be b'...'. - -.. (This last step can be prevented using --no-bytes-literals if you already have b'...' markup in your code, whose meaning would otherwise be lost.) - -Stage 1 does not add any imports from the ``future`` package. The output of -stage 1 will probably not (yet) run on Python 3. - -The goal for this stage is to create most of the ``diff`` for the entire -porting process, but without introducing any bugs. It should be uncontroversial -and safe to apply to every Python 2 package. The subsequent patches introducing -Python 3 compatibility should then be shorter and easier to review. - - -.. _forwards-conversion-stage2: - -Stage 2: Py3-style code with ``future`` wrappers for Py2 -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Run with:: - - futurize —-stage2 myfolder/*.py - -This stage adds a dependency on the ``future`` package. The goal for stage 2 -is to make further mostly safe changes to the Python 2 code to use Python -3-style code that then still runs on Python 2 with the help of the appropriate -builtins and utilities in ``future``. - -For example:: - - name = raw_input('What is your name?\n') - - for k, v in d.iteritems(): - assert isinstance(v, basestring) - - class MyClass(object): - def __unicode__(self): - return u'My object' - def __str__(self): - return unicode(self).encode('utf-8') - -would be converted by Stage 2 to this code:: - - from future.builtins import input - from future.builtins import str - from future.utils import iteritems, python_2_unicode_compatible - - name = input('What is your name?\n') - - for k, v in iteritems(d): - assert isinstance(v, (str, bytes)) - - @python_2_unicode_compatible - class MyClass(object): - def __str__(self): - return u'My object' - -Stage 2 also renames standard-library imports to their Py3 names and adds this import:: - - from future import standard_library - -For example:: - - import ConfigParser - import - -All strings are then unicode (on Py2 as on Py3) unless explicitly marked with a ``b''`` prefix. - -Ideally the output of this stage should not be a ``SyntaxError`` on either -Python 3 or Python 2. - -After this, you can run your tests on Python 3 and make further code changes -until they pass on Python 3. - -The next step would be manually adding wrappers from ``future`` to re-enable -Python 2 compatibility. See :ref:`what-else` for more info. - - - -.. _forwards-conversion-text: - -Separating text from bytes -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -After applying stage 2, the recommended step is to decide which of your Python -2 strings represent binary data and to prefix all byte-string literals for binary -data with ``b`` like ``b'\x00ABCD'``. - -After stage 2 conversion, all string literals for textual data without ``b`` -prefixes will use Python 3's ``str`` type (or the backported ``str`` object -from ``future`` on Python 2). - - -.. _forwards-conversion-stage3: - -Stage 3: Py3 code with ``future`` wrappers for Py2 -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The goal for this stage is to get the tests passing first on Py3 and then on Py2 -again with the help of the ``future`` package. - -Run with:: - - futurize —-stage2 myfolder/*.py - -This adds three further imports:: - - from __future__ import unicode_literals - from future import standard_library - from future.builtins import * - -to each module and makes other changes needed to support Python 3, such as -renaming standard-library imports to their Py3 names. - -All strings are then unicode (on Py2 as on Py3) unless explicitly marked with a ``b''`` prefix. - -Ideally the output of this stage should not be a ``SyntaxError`` on either -Python 3 or Python 2. - -After this, you can run your tests on Python 3 and make further code changes -until they pass on Python 3. - -The next step would be manually adding wrappers from ``future`` to re-enable -Python 2 compatibility. See :ref:`what-else` for more info. - - -.. _backwards-conversion: - -Backwards: 3 to both --------------------- - -Running ``futurize --from3`` turns this Python 3 code:: - - import configparser - - class Blah: - pass - print('Hello', end=None) - -into this code which runs on both Py2 and Py3:: - - from __future__ import print_function - from future import standard_library - - import configparser - - class Blah(object): - pass - print('Hello', end=None) - -Notice that in both this case and when converting from Py2 above, -``futurize`` creates a new-style class on both Python versions and -imports the renamed stdlib module under its Py3 name. - -``futurize --from3`` also handles the following Python 3 features: - -- keyword-only arguments -- metaclasses (using :func:`~future.utils.with_metaclass`) -- extended tuple unpacking (PEP 3132) - -To handle function annotations (PEP 3107), see -:ref:`func_annotations`. - - -How well does ``futurize`` work? --------------------------------- - -It is still incomplete and makes mistakes, like 2to3, on which it is -based. - -Nevertheless, ``futurize`` is useful to automate much of the work -of porting, particularly the boring repetitive text substitutions. It -also helps to flag which parts of the code require attention. - -Please report bugs on `GitHub -`_. - -Contributions to ``futurize`` are particularly welcome! Please see :ref:`contributing`. - - -.. _futurize-limitations - -Known limitations of ``futurize`` ---------------------------------- - -``futurize`` doesn't currently make any of these changes automatically:: - -1. A source encoding declaration line like:: - - # -*- coding:utf-8 -*- - - is not kept at the top of a file. It must be moved manually back to line 1 to take effect. - -2. Strings containing ``\U`` produce a ``SyntaxError`` on Python 3. An example is:: - - s = 'C:\Users'. - - Python 2 expands this to ``s = 'C:\\Users'``, but Python 3 requires a raw - prefix (``r'...'``). This also applies to multi-line strings (including - multi-line docstrings). +.. include:: futurize_cheatsheet.rst +.. include:: pasteurize.rst +.. include:: conversion_limitations.rst diff --git a/docs/bind_method.rst b/docs/bind_method.rst index 7eb91a43..d737384c 100644 --- a/docs/bind_method.rst +++ b/docs/bind_method.rst @@ -9,10 +9,10 @@ from the language. To bind a method to a class compatibly across Python 3 and Python 2, you can use the :func:`bind_method` helper function:: from future.utils import bind_method - + class Greeter(object): pass - + def greet(self, message): print(message) @@ -24,6 +24,6 @@ from the language. To bind a method to a class compatibly across Python On Python 3, calling ``bind_method(cls, name, func)`` is equivalent to calling ``setattr(cls, name, func)``. On Python 2 it is equivalent to:: - + import types setattr(cls, name, types.MethodType(func, None, cls)) diff --git a/docs/bytes_object.rst b/docs/bytes_object.rst index e2c00c9c..110280ad 100644 --- a/docs/bytes_object.rst +++ b/docs/bytes_object.rst @@ -11,11 +11,10 @@ Python 2's :class:`str`, rather than a true implementation of the Python :mod:`future` contains a backport of the :mod:`bytes` object from Python 3 which passes most of the Python 3 tests for :mod:`bytes`. (See -``future/tests/test_bytes.py`` in the source tree.) You can use it as +``tests/test_future/test_bytes.py`` in the source tree.) You can use it as follows:: - >>> from future.builtins import bytes - + >>> from builtins import bytes >>> b = bytes(b'ABCD') On Py3, this is simply the builtin :class:`bytes` object. On Py2, this @@ -27,23 +26,20 @@ strict separation of unicode strings and byte strings as Python 3's Traceback (most recent call last): File "", line 1, in TypeError: argument can't be unicode string - + >>> bytes(b',').join([u'Fred', u'Bill']) Traceback (most recent call last): File "", line 1, in TypeError: sequence item 0: expected bytes, found unicode string -Various other comparisons between byte-strings and other types return a result -in Py2 but raise a TypeError in Py3. For example, this is permissible on -Py2:: - - >>> b'u' > 10 - True + >>> b == u'ABCD' + False - >>> b'u' <= u'u' - True + >>> b < u'abc' + Traceback (most recent call last): + File "", line 1, in + TypeError: unorderable types: bytes() and -On Py3, these raise TypeErrors. In most other ways, these :class:`bytes` objects have identical behaviours to Python 3's :class:`bytes`:: @@ -51,14 +47,14 @@ behaviours to Python 3's :class:`bytes`:: b = bytes(b'ABCD') assert list(b) == [65, 66, 67, 68] assert repr(b) == "b'ABCD'" - assert b.split(b'b') == [b'A', b'CD'] + assert b.split(b'B') == [b'A', b'CD'] Currently the easiest way to ensure identical behaviour of byte-strings in a Py2/3 codebase is to wrap all byte-string literals ``b'...'`` in a :func:`~bytes` call as follows:: - - from future.builtins import * - + + from builtins import bytes + # ... b = bytes(b'This is my bytestring') @@ -70,26 +66,15 @@ code incompatibilities caused by the many differences between Py3 bytes and Py2 strings. -.. - .. _bytes-test-results: - - bytes test results - ~~~~~~~~~~~~~~~~~~ - - For reference, when using Py2's default :class:`bytes` (i.e. - :class:`str`), running the ``bytes`` unit tests from Python 3.3's - ``test_bytes.py`` on Py2 (after fixing imports) gives this:: - - -------------------------------------------------------------- - Ran 203 tests in 0.209s - - FAILED (failures=31, errors=55, skipped=1) - -------------------------------------------------------------- - - Using :mod:`future`'s backported :class:`bytes` object passes most of - the same Python 3.3 tests on Py2, except those requiring specific - wording in exception messages. - - See ``future/tests/test_bytes.py`` in the source for the actual set - of unit tests that are actually run. +The :class:`bytes` type from :mod:`builtins` also provides support for the +``surrogateescape`` error handler on Python 2.x. Here is an example that works +identically on Python 2.x and 3.x:: + + >>> from builtins import bytes + >>> b = bytes(b'\xff') + >>> b.decode('utf-8', 'surrogateescape') + '\udcc3' +This feature is in alpha. Please leave feedback `here +`_ about whether this +works for you. diff --git a/docs/changelog.rst b/docs/changelog.rst index b96890eb..420e2bc4 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,76 +1,974 @@ -What's new -********** +.. _whats-old: + +Changes in previous versions +**************************** + +Changes in the most recent major version are here: :ref:`whats-new`. + +Changes in version 0.18.3 (2023-01-13) +====================================== +This is a minor bug-fix release containing a number of fixes: + +- Backport fix for bpo-38804 (c91d70b) +- Fix bug in fix_print.py fixer (dffc579) +- Fix bug in fix_raise.py fixer (3401099) +- Fix newint bool in py3 (fe645ba) +- Fix bug in super() with metaclasses (6e27aac) +- docs: fix simple typo, reqest -> request (974eb1f) +- Correct __eq__ (c780bf5) +- Pass if lint fails (2abe00d) +- Update docker image and parcel out to constant variable. Add comment to update version constant (45cf382) +- fix order (f96a219) +- Add flake8 to image (046ff18) +- Make lint.sh executable (58cc984) +- Add docker push to optimize CI (01e8440) +- Build System (42b3025) +- Add docs build status badge to README.md (3f40bd7) +- Use same docs requirements in tox (18ecc5a) +- Add docs/requirements.txt (5f9893f) +- Add PY37_PLUS, PY38_PLUS, and PY39_PLUS (bee0247) +- fix 2.6 test, better comment (ddedcb9) +- fix 2.6 test (3f1ff7e) +- remove nan test (4dbded1) +- include list test values (e3f1a12) +- fix other python2 test issues (c051026) +- fix missing subTest (f006cad) +- import from old imp library on older python versions (fc84fa8) +- replace fstrings with format for python 3.4,3.5 (4a687ea) +- minor style/spelling fixes (8302d8c) +- improve cmp function, add unittest (0d95a40) +- Pin typing==3.7.4.1 for Python 3.3 compatiblity (1a48f1b) +- Fix various py26 unit test failures (9ca5a14) +- Add initial contributing guide with docs build instruction (e55f915) +- Add docs building to tox.ini (3ee9e7f) +- Support NumPy's specialized int types in builtins.round (b4b54f0) +- Added r""" to the docstring to avoid warnings in python3 (5f94572) +- Add __subclasscheck__ for past.types.basestring (c9bc0ff) +- Correct example in README (681e78c) +- Add simple documentation (6c6e3ae) +- Add pre-commit hooks (a9c6a37) +- Handling of __next__ and next by future.utils.get_next was reversed (52b0ff9) +- Add a test for our fix (461d77e) +- Compare headers to correct definition of str (3eaa8fd) +- #322 Add support for negative ndigits in round; additionally, fixing a bug so that it handles passing in Decimal properly (a4911b9) +- Add tkFileDialog to future.movers.tkinter (f6a6549) +- Sort before comparing dicts in TestChainMap (6126997) +- Fix typo (4dfa099) +- Fix formatting in "What's new" (1663dfa) +- Fix typo (4236061) +- Avoid DeprecationWarning caused by invalid escape (e4b7fa1) +- Fixup broken link to external django documentation re: porting to Python 3 and unicode_literals (d87713e) +- Fixed newdict checking version every time (99030ec) +- Add count from 2.7 to 2.6 (1b8ef51) + +Changes in version 0.18.2 (2019-10-30) +====================================== + +This is a minor bug-fix release containing a number of fixes: + +- Fix min/max functions with generators, and 'None' default (PR #514) +- Use BaseException in raise_() (PR #515) +- Fix builtins.round() for Decimals (Issue #501) +- Fix raise_from() to prevent failures with immutable classes (PR #518) +- Make FixInput idempotent (Issue #427) +- Fix type in newround (PR #521) +- Support mimetype guessing in urllib2 for Py3.8+ (Issue #508) + +Python 3.8 is not yet officially supported. + +Changes in version 0.18.1 (2019-10-09) +====================================== + +This is a minor bug-fix release containing a fix for raise_() +when passed an exception that's not an Exception (e.g. BaseException +subclasses) + +Changes in version 0.18.0 (2019-10-09) +====================================== + +This is a major bug-fix and feature release, including: + +- Fix collections.abc import for py38+ +- Remove import for isnewbytes() function, reducing CPU cost significantly +- Fix bug with importing past.translation when importing past which breaks zipped python installations +- Fix an issue with copyreg import under Py3 that results in unexposed stdlib functionality +- Export and document types in future.utils +- Update behavior of newstr.__eq__() to match str.__eq__() as per reference docs +- Fix raising and the raising fixer to handle cases where the syntax is ambiguous +- Allow "default" parameter in min() and max() (Issue #334) +- Implement __hash__() in newstr (Issue #454) +- Future proof some version checks to handle the fact that Py4 won't be a major breaking release +- Fix urllib.request imports for Python 3.8 compatibility (Issue #447) +- Fix future import ordering (Issue #445) +- Fixed bug in fix_division_safe fixture (Issue #434) +- Do not globally destroy re.ASCII in PY3 +- Fix a bug in email.Message.set_boundary() (Issue #429) +- Implement format_map() in str +- Implement readinto() for socket.fp + +As well as a number of corrections to a variety of documentation, and updates to +test infrastructure. + +Changes in version 0.17.1 (2018-10-30) +====================================== + +This release address a packaging error because of an erroneous declaration that +any built wheels are universal. + +Changes in version 0.17.0 (2018-10-19) +====================================== + +This is a major bug-fix release, including: + +- Fix ``from collections import ChainMap`` after install_aliases() (issue #226) +- Fix multiple import from ``__future__`` bug in futurize (issue #113) +- Add support for proper %s formatting of newbytes +- Properly implement iterator protocol for newrange object +- Fix ``past.translation`` on read-only file systems +- Fix Tkinter import bug introduced in Python 2.7.4 (issue #262) +- Correct TypeError to ValueError in a specific edge case for newrange +- Support inequality tests between newstrs and newbytes +- Add type check to __get__ in newsuper +- Fix fix_divsion_safe to support better conversion of complex expressions, and + skip obvious float division. + +As well as a number of corrections to a variety of documentation, and updates to +test infrastructure. + +Changes in version 0.16.0 (2016-10-27) +====================================== + +This release removes the ``configparser`` package as an alias for +``ConfigParser`` on Py2 to improve compatibility with the backported +`configparser package `. Previously +``python-future`` and the PyPI ``configparser`` backport clashed, causing +various compatibility issues. (Issues #118, #181) + +If your code previously relied on ``configparser`` being supplied by +``python-future``, the recommended upgrade path is to run ``pip install +configparser`` or add ``configparser`` to your ``requirements.txt`` file. + +Note that, if you are upgrading ``future`` with ``pip``, you may need to +uninstall the old version of future or manually remove the +``site-packages/future-0.15.2-py2.7.egg`` folder for this change to take +effect on your system. + +This releases also fixes these bugs: + +- Fix ``newbytes`` constructor bug. (Issue #171) +- Fix semantics of ``bool()`` with ``newobject``. (Issue #211) +- Fix ``standard_library.install_aliases()`` on PyPy. (Issue #205) +- Fix assertRaises for ``pow`` and ``compile``` on Python 3.5. (Issue #183) +- Fix return argument of ``future.utils.ensure_new_type`` if conversion to + new type does not exist. (Issue #185) +- Add missing ``cmp_to_key`` for Py2.6. (Issue #189) +- Allow the ``old_div`` fixer to be disabled. (Issue #190) +- Improve compatibility with Google App Engine. (Issue #231) +- Add some missing imports to the ``tkinter`` and ``tkinter.filedialog`` + package namespaces. (Issues #212 and #233) +- More complete implementation of ``raise_from`` on PY3. (Issues #141, + #213 and #235, fix provided by Varriount) + + +Changes in version 0.15.2 (2015-09-11) +====================================== + +This is a minor bug-fix release: + +- Fix ``socket.create_connection()`` backport on Py2.6 (issue #162) +- Add more tests of ``urllib.request`` etc. +- Fix ``newsuper()`` calls from the ``__init__`` method of PyQt subclassses + (issue #160, thanks to Christopher Arndt) + +Changes in version 0.15.1 (2015-09-09) +====================================== + +This is a minor bug-fix release: + +- Use 3-argument ``socket.create_connection()`` backport to restore Py2.6 + compatibility in ``urllib.request.urlopen()`` (issue #162) +- Remove breakpoint in ``future.backports.http.client`` triggered on certain + data (issue #164) +- Move ``exec`` fixer to stage 1 of ``futurize`` because the forward-compatible ``exec(a, b)`` + idiom is supported in Python 2.6 and 2.7. See + https://docs.python.org/2/reference/simple_stmts.html#exec. + + +Changes in version 0.15.0 (2015-07-25) +====================================== + +This release fixes compatibility bugs with CherryPy's Py2/3 compat layer and +the latest version of the ``urllib3`` package. It also adds some additional +backports for Py2.6 and Py2.7 from Py3.4's standard library. + +New features: + +- ``install_aliases()`` now exposes full backports of the Py3 urllib submodules + (``parse``, ``request`` etc.) from ``future.backports.urllib`` as submodules + of ``urllib`` on Py2. This implies, for example, that + ``urllib.parse.unquote`` now takes an optional encoding argument as it does + on Py3. This improves compatibility with CherryPy's Py2/3 compat layer (issue + #158). +- ``tkinter.ttk`` support (issue #151) +- Backport of ``collections.ChainMap`` (issue #150) +- Backport of ``itertools.count`` for Py2.6 (issue #152) +- Enable and document support for the ``surrogateescape`` error handler for ``newstr`` and ``newbytes`` objects on Py2.x (issue #116). This feature is currently in alpha. +- Add constants to ``http.client`` such as ``HTTP_PORT`` and ``BAD_REQUEST`` (issue #137) +- Backport of ``reprlib.recursive_repr`` to Py2 + +Bug fixes: + +- Add ``HTTPMessage`` to ``http.client``, which is missing from ``httplib.__all__`` on Python <= 2.7.10. This restores compatibility with the latest ``urllib3`` package (issue #159, thanks to Waldemar Kornewald) +- Expand newint.__divmod__ and newint.__rdivmod__ to fall back to + implementations where appropriate (issue #146 - thanks to Matt Bogosian) +- Fix newrange slicing for some slice/range combos (issue #132, thanks to Brad Walker) +- Small doc fixes (thanks to Michael Joseph and Tim Tröndle) +- Improve robustness of test suite against opening .pyc files as text on Py2 +- Update backports of ``Counter`` and ``OrderedDict`` to use the newer + implementations from Py3.4. This fixes ``.copy()`` preserving subclasses etc. +- ``futurize`` no longer breaks working Py2 code by changing ``basestring`` to + ``str``. Instead it imports the ``basestring`` forward-port from + ``past.builtins`` (issues #127 and #156) +- ``future.utils``: add ``string_types`` etc. and update docs (issue #126) -.. whats-new-0.11: +.. _whats-new-0.14.x: -What's new in version 0.11 -========================== +Changes in version 0.14.3 (2014-12-15) +====================================== + +This is a bug-fix release: + +- Expose contents of ``thread`` (not ``dummy_thread``) as ``_thread`` on Py2 (Issue #124) +- Add signed support for ``newint.to_bytes()`` (Issue #128) +- Fix ``OrderedDict.clear()`` on Py2.6 (Issue #125) +- Improve ``newrange``: equality and slicing, start/stop/step properties, refactoring (Issues #129, #130) +- Minor doc updates + +Changes in version 0.14.2 (2014-11-21) +====================================== -More robust implementation of standard_library hooks ----------------------------------------------------- +This is a bug-fix release: -``future.standard_library`` now no longer installs import hooks by default. -These were bleeding into surrounding code, causing incompatibilities with -modules like ``requests`` (issue #19). +- Speed up importing of ``past.translation`` (Issue #117) +- ``html.escape()``: replace function with the more robust one from Py3.4 +- ``futurize``: avoid displacing encoding comments by ``__future__`` imports (Issues #97, #10, #121) +- ``futurize``: don't swallow exit code (Issue #119) +- Packaging: don't forcibly remove the old build dir in ``setup.py`` (Issue #108) +- Docs: update further docs and tests to refer to ``install_aliases()`` instead of + ``install_hooks()`` +- Docs: fix ``iteritems`` import error in cheat sheet (Issue #120) +- Tests: don't rely on presence of ``test.test_support`` on Py2 or ``test.support`` on Py3 (Issue #109) +- Tests: don't override existing ``PYTHONPATH`` for tests (PR #111) -Now ``future.standard_library`` provides the context manager -``enable_hooks()``. Use it as follows:: +Changes in version 0.14.1 (2014-10-02) +====================================== - >>> from future import standard_library - >>> with standard_library.enable_hooks(): +This is a minor bug-fix release: + +- Docs: add a missing template file for building docs (Issue #108) +- Tests: fix a bug in error handling while reporting failed script runs (Issue #109) +- ``install_aliases()``: don't assume that the ``test.test_support`` module always + exists on Py2 (Issue #109) + + +Changes in version 0.14.0 (2014-10-02) +====================================== + +This is a major new release that offers a cleaner interface for most imports in +Python 2/3 compatible code. + +Instead of this interface:: + + >>> from future.builtins import str, open, range, dict + + >>> from future.standard_library import hooks + >>> with hooks(): ... import queue - ... import socketserver - ... from http.client import HTTPConnection - >>> import requests - >>> # etc. + ... import configparser + ... import tkinter.dialog + ... # etc. + +You can now use the following interface for much Python 2/3 compatible code:: + + >>> # Alias for future.builtins on Py2: + >>> from builtins import str, open, range, dict + + >>> # Alias for future.moves.* on Py2: + >>> import queue + >>> import configparser + >>> import tkinter.dialog + >>> etc. + +Notice that the above code will run on Python 3 even without the presence of the +``future`` package. Of the 44 standard library modules that were refactored with +PEP 3108, 30 are supported with direct imports in this manner. (These are listed +here: :ref:`direct-imports`.) + +The other 14 standard library modules that kept the same top-level names in +Py3.x are not supported with this direct import interface on Py2. These include +the 5 modules in the Py3 ``urllib`` package. These modules are accessible through +the following interface (as well as the interfaces offered in previous versions +of ``python-future``):: + + from future.standard_library import install_aliases + install_aliases() + + from collections import UserDict, UserList, UserString + import dbm.gnu + from itertools import filterfalse, zip_longest + from subprocess import getoutput, getstatusoutput + from sys import intern + import test.support + from urllib.request import urlopen + from urllib.parse import urlparse + # etc. + from collections import Counter, OrderedDict # backported to Py2.6 + +The complete list of packages supported with this interface is here: +:ref:`list-standard-library-refactored`. + +For more information on these and other interfaces to the standard library, see +:ref:`standard-library-imports`. + +Bug fixes +--------- + +- This release expands the ``future.moves`` package to include most of the remaining + modules that were moved in the standard library reorganization (PEP 3108). + (Issue #104) + +- This release also removes the broken ``--doctests_only`` option from the ``futurize`` + and ``pasteurize`` scripts for now. (Issue #103) + +Internal cleanups +----------------- + +The project folder structure has changed. Top-level packages are now in a +``src`` folder and the tests have been moved into a project-level ``tests`` +folder. + +The following deprecated internal modules have been removed (Issue #80): + +- ``future.utils.encoding`` and ``future.utils.six``. + +Deprecations +------------ + +The following internal functions have been deprecated and will be removed in a future release: + +- ``future.standard_library.scrub_py2_sys_modules`` +- ``future.standard_library.scrub_future_sys_modules`` + + +.. _whats-new-0.13.x: + +Changes in version 0.13.1 (2014-09-23) +====================================== + +This is a bug-fix release: + +- Fix (multiple) inheritance of ``future.builtins.object`` with metaclasses (Issues #91, #96) +- Fix ``futurize``'s refactoring of ``urllib`` imports (Issue #94) +- Fix ``futurize --all-imports`` (Issue #101) +- Fix ``futurize --output-dir`` logging (Issue #102) +- Doc formatting fix (Issues #98, #100) + + +Changes in version 0.13.0 (2014-08-13) +====================================== + +This is mostly a clean-up release. It adds some small new compatibility features +and fixes several bugs. + +Deprecations +------------ + +The following unused internal modules are now deprecated. They will be removed in a +future release: + +- ``future.utils.encoding`` and ``future.utils.six``. + +(Issue #80). See `here `_ +for the rationale for unbundling them. + + +New features +------------ + +- Docs: Add :ref:`compatible-idioms` from Ed Schofield's PyConAU 2014 talk. +- Add ``newint.to_bytes()`` and ``newint.from_bytes()``. (Issue #85) +- Add ``future.utils.raise_from`` as an equivalent to Py3's ``raise ... from + ...`` syntax. (Issue #86) +- Add ``past.builtins.oct()`` function. +- Add backports for Python 2.6 of ``subprocess.check_output()``, + ``itertools.combinations_with_replacement()``, and ``functools.cmp_to_key()``. + +Bug fixes +--------- + +- Use a private logger instead of the global logger in + ``future.standard_library`` (Issue #82). This restores compatibility of the + standard library hooks with ``flask``. (Issue #79) +- Stage 1 of ``futurize`` no longer renames ``next`` methods to ``__next__`` + (Issue #81). It still converts ``obj.next()`` method calls to + ``next(obj)`` correctly. +- Prevent introduction of a second set of parentheses in ``print()`` calls in + some further cases. +- Fix ``isinstance`` checks for subclasses of future types. (Issue #89) +- Be explicit about encoding file contents as UTF-8 in unit tests. (Issue #63) + Useful for building RPMs and in other environments where ``LANG=C``. +- Fix for 3-argument ``pow(x, y, z)`` with ``newint`` arguments. (Thanks to @str4d.) + (Issue #87) + + +.. _whats-new-0.12.4: + +Changes in version 0.12.4 (2014-07-18) +====================================== + +- Fix upcasting behaviour of ``newint``. (Issue #76) + + +.. _whats-new-0.12.3: + +Changes in version 0.12.3 (2014-06-19) +====================================== + +- Add "official Python 3.4 support": Py3.4 is now listed among the PyPI Trove + classifiers and the tests now run successfully on Py3.4. (Issue #67) + +- Add backports of ``collections.OrderedDict`` and + ``collections.Counter`` for Python 2.6. (Issue #52) + +- Add ``--version`` option for ``futurize`` and ``pasteurize`` scripts. + (Issue #57) + +- Fix ``future.utils.ensure_new_type`` with ``long`` input. (Issue #65) + +- Remove some false alarms on checks for ambiguous fixer names with + ``futurize -f ...``. + +- Testing fixes: + - Don't hard-code Python interpreter command in tests. (Issue #62) + - Fix deprecated ``unittest`` usage in Py3. (Issue #62) + - Be explicit about encoding temporary file contents as UTF-8 for + when ``LANG=C`` (e.g., when building an RPM). (Issue #63) + - All undecorated tests are now passing again on Python 2.6, 2.7, 3.3, + and 3.4 (thanks to Elliott Sales de Andrade). + +- Docs: + - Add list of fixers used by ``futurize``. (Issue #58) + - Add list of contributors to the Credits page. + +.. _whats-new-0.12.2: + +Changes in version 0.12.2 (2014-05-25) +====================================== + +- Add ``bytes.maketrans()`` method. (Issue #51) +- Add support for Python versions between 2.7.0 and 2.7.3 (inclusive). + (Issue #53) +- Bug fix for ``newlist(newlist([1, 2, 3]))``. (Issue #50) + + +.. _whats-new-0.12.1: + +Changes in version 0.12.1 (2014-05-14) +====================================== + +- Python 2.6 support: ``future.standard_library`` now isolates the ``importlib`` + dependency to one function (``import_``) so the ``importlib`` backport may + not be needed. + +- Doc updates + + +.. _whats-new-0.12: + +Changes in version 0.12.0 (2014-05-06) +====================================== + +The major new feature in this version is improvements in the support for the +reorganized standard library (PEP 3108) and compatibility of the import +mechanism with 3rd-party modules. + +More robust standard-library import hooks +----------------------------------------- + +**Note: backwards-incompatible change:** As previously announced (see +:ref:`deprecated-auto-import-hooks`), the import hooks must now be enabled +explicitly, as follows:: + + from future import standard_library + with standard_library.hooks(): + import html.parser + import http.client + ... + +This now causes these modules to be imported from ``future.moves``, a new +package that provides wrappers over the native Python 2 standard library with +the new Python 3 organization. As a consequence, the import hooks provided in +``future.standard_library`` are now fully compatible with the `Requests library +`_. + +The functional interface with ``install_hooks()`` is still supported for +backwards compatibility:: + + from future import standard_library + standard_library.install_hooks(): + + import html.parser + import http.client + ... + standard_library.remove_hooks() + +Explicit installation of import hooks allows finer-grained control +over whether they are enabled for other imported modules that provide their own +Python 2/3 compatibility layer. This also improves compatibility of ``future`` +with tools like ``py2exe``. + + +``newobject`` base object defines fallback Py2-compatible special methods +------------------------------------------------------------------------- + +There is a new ``future.types.newobject`` base class (available as +``future.builtins.object``) that can streamline Py2/3 compatible code by +providing fallback Py2-compatible special methods for its subclasses. It +currently provides ``next()`` and ``__nonzero__()`` as fallback methods on Py2 +when its subclasses define the corresponding Py3-style ``__next__()`` and +``__bool__()`` methods. + +This obviates the need to add certain compatibility hacks or decorators to the +code such as the ``@implements_iterator`` decorator for classes that define a +Py3-style ``__next__`` method. + +In this example, the code defines a Py3-style iterator with a ``__next__`` +method. The ``object`` class defines a ``next`` method for Python 2 that maps +to ``__next__``:: + + from future.builtins import object + + class Upper(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def __next__(self): # note the Py3 interface + return next(self._iter).upper() + def __iter__(self): + return self + + assert list(Upper('hello')) == list('HELLO') + +``newobject`` defines other Py2-compatible special methods similarly: +currently these include ``__nonzero__`` (mapped to ``__bool__``) and +``__long__`` (mapped to ``__int__``). + +Inheriting from ``newobject`` on Python 2 is safe even if your class defines +its own Python 2-style ``__nonzero__`` and ``next`` and ``__long__`` methods. +Your custom methods will simply override those on the base class. + +On Python 3, as usual, ``future.builtins.object`` simply refers to ``builtins.object``. + + +``past.builtins`` module improved +--------------------------------- + +The ``past.builtins`` module is much more compatible with the corresponding +builtins on Python 2; many more of the Py2 unit tests pass on Py3. For example, +functions like ``map()`` and ``filter()`` now behave as they do on Py2 with with +``None`` as the first argument. + +The ``past.builtins`` module has also been extended to add Py3 support for +additional Py2 constructs that are not adequately handled by ``lib2to3`` (see +Issue #37). This includes new ``execfile()`` and ``cmp()`` functions. +``futurize`` now invokes imports of these functions from ``past.builtins``. + + +``surrogateescape`` error handler +--------------------------------- + +The ``newstr`` type (``future.builtins.str``) now supports a backport of the +Py3.x ``'surrogateescape'`` error handler for preserving high-bit +characters when encoding and decoding strings with unknown encodings. + + +``newlist`` type +---------------- + +There is a new ``list`` type in ``future.builtins`` that offers ``.copy()`` and +``.clear()`` methods like the ``list`` type in Python 3. + + +``listvalues`` and ``listitems`` +-------------------------------- + +``future.utils`` now contains helper functions ``listvalues`` and +``listitems``, which provide Python 2-style list snapshotting semantics for +dictionaries in both Python 2 and Python 3. + +These came out of the discussion around Nick Coghlan's now-withdrawn PEP 469. + +There is no corresponding ``listkeys(d)`` function; use ``list(d)`` instead. + + +Tests +----- + +The number of unit tests has increased from 600 to over 800. Most of the new +tests come from Python 3.3's test suite. + + +Refactoring of ``future.standard_library.*`` -> ``future.backports`` +-------------------------------------------------------------------- + +The backported standard library modules have been moved to ``future.backports`` +to make the distinction clearer between these and the new ``future.moves`` +package. + + +Backported ``http.server`` and ``urllib`` modules +------------------------------------------------- -If you prefer, the following imports are also available directly:: +Alpha versions of backports of the ``http.server`` and ``urllib`` module from +Python 3.3's standard library are now provided in ``future.backports``. - >>> from future.standard_library import queue - >>> from future.standard_library import socketserver - >>> from future.standard_library.http.client import HTTPConnection +Use them like this:: + from future.backports.urllib.request import Request # etc. + from future.backports.http import server as http_server -As usual, this has no effect on Python 3. +Or with this new interface:: -*Note*: this is a backward-incompatible change. + from future.standard_library import import_, from_import -Simpler imports ---------------- + Request = from_import('urllib.request', 'Request', backport=True) + http = import_('http.server', backport=True) -It is now possible to import builtins directly from the ``future`` -namespace as follows:: +.. from future.standard_library.email import message_from_bytes # etc. +.. from future.standard_library.xmlrpc import client, server - >>> from future import * - -or just those you need:: - >>> from future import open, str +Internal refactoring +-------------------- +The ``future.builtins.types`` module has been moved to ``future.types``. +Likewise, ``past.builtins.types`` has been moved to ``past.types``. The only +user-visible effect of this is to change ``repr(type(obj))`` for instances +of these types. For example:: -Utility functions for raising exceptions with a traceback portably + >>> from future.builtins import bytes + >>> bytes(b'abc') + >>> type(b) + future.types.newbytes.newbytes + +Instead of:: + + >>> type(b) # prior to v0.12 + future.builtins.types.newbytes.newbytes + + +Bug fixes +--------- + +Many small improvements and fixes have been made across the project. Some highlights are: + +- Fixes and updates from Python 3.3.5 have been included in the backported + standard library modules. + +- Scrubbing of the ``sys.modules`` cache performed by ``remove_hooks()`` (also + called by the ``suspend_hooks`` and ``hooks`` context managers) is now more + conservative. + +.. Is this still true? +.. It now removes only modules with Py3 names (such as +.. ``urllib.parse``) and not the corresponding ``future.standard_library.*`` +.. modules (such as ``future.standard_library.urllib.parse``. + +- The ``fix_next`` and ``fix_reduce`` fixers have been moved to stage 1 of + ``futurize``. + +- ``futurize``: Shebang lines such as ``#!/usr/bin/env python`` and source code + file encoding declarations like ``# -*- coding=utf-8 -*-`` are no longer occasionally + displaced by ``from __future__ import ...`` statements. (Issue #10) + +- Improved compatibility with ``py2exe`` (`Issue #31 `_). + +- The ``future.utils.bytes_to_native_str`` function now returns a platform-native string + object and ``future.utils.native_str_to_bytes`` returns a ``newbytes`` object on Py2. + (`Issue #47 `_). + +- The backported ``http.client`` module and related modules use other new + backported modules such as ``email``. As a result they are more compliant + with the Python 3.3 equivalents. + + +.. _whats-new-0.11.4: + +Changes in version 0.11.4 (2014-05-25) +====================================== + +This release contains various small improvements and fixes: + +- This release restores Python 2.6 compatibility. (Issue #42) + +- The ``fix_absolute_import`` fixer now supports Cython ``.pyx`` modules. (Issue + #35) + +- Right-division with ``newint`` objects is fixed. (Issue #38) + +- The ``fix_dict`` fixer has been moved to stage2 of ``futurize``. + +- Calls to ``bytes(string, encoding[, errors])`` now work with ``encoding`` and + ``errors`` passed as positional arguments. Previously this only worked if + ``encoding`` and ``errors`` were passed as keyword arguments. + + +- The 0-argument ``super()`` function now works from inside static methods such + as ``__new__``. (Issue #36) + +- ``future.utils.native(d)`` calls now work for ``future.builtins.dict`` objects. + + +.. _whats-new-0.11.3: + +Changes in version 0.11.3 (2014-02-27) +====================================== + +This release has improvements in the standard library import hooks mechanism and +its compatibility with 3rd-party modules: + + +Improved compatibility with ``requests`` +---------------------------------------- + +The ``__exit__`` function of the ``hooks`` context manager and the +``remove_hooks`` function both now remove submodules of +``future.standard_library`` from the ``sys.modules`` cache. Therefore this code +is now possible on Python 2 and 3:: + + from future import standard_library + standard_library.install_hooks() + import http.client + standard_library.remove_hooks() + import requests + + data = requests.get('http://www.google.com') + + +Previously, this required manually removing ``http`` and ``http.client`` from +``sys.modules`` before importing ``requests`` on Python 2.x. (Issue #19) + +This change should also improve the compatibility of the standard library hooks +with any other module that provides its own Python 2/3 compatibility code. + +Note that the situation will improve further in version 0.12; import hooks will +require an explicit function call or the ``hooks`` context manager. + + +Conversion scripts explicitly install import hooks +-------------------------------------------------- + +The ``futurize`` and ``pasteurize`` scripts now add an explicit call to +``install_hooks()`` to install the standard library import hooks. These scripts +now add these two lines:: + + from future import standard_library + standard_library.install_hooks() + +instead of just the first one. The next major version of ``future`` (0.12) will +require the explicit call or use of the ``hooks`` context manager. This will +allow finer-grained control over whether import hooks are enabled for other +imported modules, such as ``requests``, which provide their own Python 2/3 +compatibility code. + + +``futurize`` script no longer adds ``unicode_literals`` by default ------------------------------------------------------------------ -The functions ``raise_with_traceback()`` and ``raise_`` were added to -``future.utils`` to offer either the Python 3.x or Python 2.x behaviour -for raising exceptions. Thanks to Joel Tratner for the contribution of -these. +There is a new ``--unicode-literals`` flag to ``futurize`` that adds the +import:: + + from __future__ import unicode_literals + +to the top of each converted module. Without this flag, ``futurize`` now no +longer adds this import. (Issue #22) + +The ``pasteurize`` script for converting from Py3 to Py2/3 still adds +``unicode_literals``. (See the comments in Issue #22 for an explanation.) + + +.. _whats-new-0.11: + +Changes in version 0.11 (2014-01-28) +==================================== + +There are several major new features in version 0.11. + + +``past`` package +---------------- + +The python-future project now provides a ``past`` package in addition to the +``future`` package. Whereas ``future`` provides improved compatibility with +Python 3 code to Python 2, ``past`` provides support for using and interacting +with Python 2 code from Python 3. The structure reflects that of ``future``, +with ``past.builtins`` and ``past.utils``. There is also a new +``past.translation`` package that provides transparent translation of Python 2 +code to Python 3. (See below.) + +One purpose of ``past`` is to ease module-by-module upgrades to +codebases from Python 2. Another is to help with enabling Python 2 libraries to +support Python 3 without breaking the API they currently provide. (For example, +user code may expect these libraries to pass them Python 2's 8-bit strings, +rather than Python 3's ``bytes`` object.) A third purpose is to help migrate +projects to Python 3 even if one or more dependencies are still on Python 2. + +Currently ``past.builtins`` provides forward-ports of Python 2's ``str`` and +``dict`` objects, ``basestring``, and list-producing iterator functions. In +later releases, ``past.builtins`` will be used internally by the +``past.translation`` package to help with importing and using old Python 2 +modules in a Python 3 environment. -Deprecated ``isinstance`` replacement removed ---------------------------------------------- +Auto-translation of Python 2 modules upon import +------------------------------------------------ -``future`` v0.8.2 briefly introduced a replacement for the ``isinstance`` -builtin. This was then removed and its use was deprecated as of v0.9.0. -The alias for the builtin ``isinstance`` has now been removed from -``future.builtins``. +``past`` provides an experimental ``translation`` package to help +with importing and using old Python 2 modules in a Python 3 environment. +This is implemented using import hooks that attempt to automatically +translate Python 2 modules to Python 3 syntax and semantics upon import. Use +it like this:: -.. whats-new-0.10: + $ pip3 install plotrique==0.2.5-7 --no-compile # to ignore SyntaxErrors + $ python3 -What's new in version 0.10.x -============================ +Then pass in a whitelist of module name prefixes to the +``past.translation.autotranslate()`` function. Example:: + + >>> from past.translation import autotranslate + >>> autotranslate(['plotrique']) + >>> import plotrique + + +This is intended to help you migrate to Python 3 without the need for all +your code's dependencies to support Python 3 yet. It should be used as a +last resort; ideally Python 2-only dependencies should be ported +properly to a Python 2/3 compatible codebase using a tool like +``futurize`` and the changes should be pushed to the upstream project. + +For more information, see :ref:`translation`. + + +Separate ``pasteurize`` script +------------------------------ + +The functionality from ``futurize --from3`` is now in a separate script called +``pasteurize``. Use ``pasteurize`` when converting from Python 3 code to Python +2/3 compatible source. For more information, see :ref:`backwards-conversion`. + + +``pow()`` +--------- + +There is now a ``pow()`` function in ``future.builtins.misc`` that behaves like +the Python 3 ``pow()`` function when raising a negative number to a fractional +power (returning a complex number). + + +``input()`` no longer disabled globally on Py2 +---------------------------------------------- + +Previous versions of ``future`` deleted the ``input()`` function from +``__builtin__`` on Python 2 as a security measure. This was because +Python 2's ``input()`` function allows arbitrary code execution and could +present a security vulnerability on Python 2 if someone expects Python 3 +semantics but forgets to import ``input`` from ``future.builtins``. This +behaviour has been reverted, in the interests of broadening the +compatibility of ``future`` with other Python 2 modules. + +Please remember to import ``input`` from ``future.builtins`` if you use +``input()`` in a Python 2/3 compatible codebase. + + +.. _deprecated-auto-import-hooks: + +Deprecated feature: auto-installation of standard-library import hooks +---------------------------------------------------------------------- + +Previous versions of ``python-future`` installed import hooks automatically upon +importing the ``standard_library`` module from ``future``. This has been +deprecated in order to improve robustness and compatibility with modules like +``requests`` that already perform their own single-source Python 2/3 +compatibility. + +As of v0.12, importing ``future.standard_library`` +will no longer install import hooks by default. Instead, please install the +import hooks explicitly as follows:: + + from future import standard_library + standard_library.install_hooks() + +And uninstall them after your import statements using:: + + standard_library.remove_hooks() + +*Note*: This is a backward-incompatible change. + + + +Internal changes +---------------- + +The internal ``future.builtins.backports`` module has been renamed to +``future.builtins.types``. This will change the ``repr`` of ``future`` +types but not their use. + + +.. _whats-new-0.10.2: + +Changes in version 0.10.2 (2014-01-11) +====================================== + +New context-manager interface to ``standard_library.hooks`` +----------------------------------------------------------- + +There is a new context manager ``future.standard_library.hooks``. Use it like +this:: + + from future import standard_library + with standard_library.hooks(): + import queue + import configserver + from http.client import HTTPConnection + # etc. + +If not using this context manager, it is now encouraged to add an explicit call to +``standard_library.install_hooks()`` as follows:: + + from future import standard_library + standard_library.install_hooks() + + import queue + import html + import http.client + # etc. + +And to remove the hooks afterwards with:: + + standard_library.remove_hooks() + +The functions ``install_hooks()`` and ``remove_hooks()`` were previously +called ``enable_hooks()`` and ``disable_hooks()``. The old names are +deprecated (but are still available as aliases). + +As usual, this feature has no effect on Python 3. + + +.. _whats-new-0.10: + +Changes in version 0.10.0 (2013-12-02) +====================================== Backported ``dict`` type ------------------------ @@ -83,7 +981,7 @@ over large dictionaries. For example:: from __future__ import print_function from future.builtins import dict, range - + squares = dict({i: i**2 for i in range(10**7)}) assert not isinstance(d.items(), list) @@ -92,21 +990,34 @@ over large dictionaries. For example:: For more information, see :ref:`dict-object`. + +Utility functions ``raise_`` and ``exec_`` +------------------------------------------ + +The functions ``raise_with_traceback()`` and ``raise_()`` were +added to ``future.utils`` to offer either the Python 3.x or Python 2.x +behaviour for raising exceptions. Thanks to Joel Tratner for the +contribution of these. ``future.utils.reraise()`` is now deprecated. + +A portable ``exec_()`` function has been added to ``future.utils`` from +``six``. + + Bugfixes -------- -- Fixed newint.__divmod__ -- Improved robustness of :func:`disable_hooks` and :func:`enable_hooks()` in :mod:`future.standard_library` +- Fixed ``newint.__divmod__`` +- Improved robustness of installing and removing import hooks in :mod:`future.standard_library` - v0.10.1: Fixed broken ``pip install future`` on Py3 -.. whats-new-0.9: +.. _whats-new-0.9: -What's new in version 0.9.x -=========================== +Changes in version 0.9 (2013-11-06) +=================================== -``isinstance`` checks supported natively with backported types --------------------------------------------------------------- +``isinstance`` checks are supported natively with backported types +------------------------------------------------------------------ The ``isinstance`` function is no longer redefined in ``future.builtins`` to operate with the backported ``int``, ``bytes`` and ``str``. @@ -126,7 +1037,7 @@ imports deemed necessary. There is now an ``--all-imports`` option to the ``futurize`` script which gives the previous behaviour, which is to add all ``__future__`` imports and ``from future.builtins import *`` imports to every module. (This even -applies to an empty ``__init__.py`` file. +applies to an empty ``__init__.py`` file.) Looser type-checking for the backported ``str`` object @@ -141,8 +1052,8 @@ byte-strings and unicode strings, such as ``os.path.join`` in ``posixpath.py``. Python 3 when attempting to mix it with ``future.builtins.bytes``. -suspend_hooks() context manager added to ``future.standard_library`` --------------------------------------------------------------------- +``suspend_hooks()`` context manager added to ``future.standard_library`` +------------------------------------------------------------------------ Pychecker (as of v0.6.1)'s ``checker.py`` attempts to import the ``builtins`` module as a way of determining whether Python 3 is running. Since this @@ -159,10 +1070,10 @@ To work around this, ``future`` now provides a context manager called from pychecker.checker import Checker -.. whats-new-0.8: +.. _whats-new-0.8: -What's new in version 0.8 -========================= +Changes in version 0.8 (2013-10-28) +=================================== Python 2.6 support ------------------ @@ -189,13 +1100,13 @@ alongside each other easily if needed. The unused ``hacks`` module has also been removed from the source tree. -isinstance() added to :mod:`future.builtins` (v0.8.2) ------------------------------------------------------ +``isinstance()`` added to :mod:`future.builtins` (v0.8.2) +--------------------------------------------------------- -It is now possible to use ``isinstance()`` calls normally after importing ``isinstance`` from +It is now possible to use ``isinstance()`` calls normally after importing ``isinstance`` from ``future.builtins``. On Python 2, this is specially defined to be compatible with ``future``'s backported ``int``, ``str``, and ``bytes`` types, as well as -handling Python 2's int/long distinction. +handling Python 2's ``int``/``long`` distinction. The result is that code that uses ``isinstance`` to perform type-checking of ints, strings, and bytes should now work identically on Python 2 as on Python 3. @@ -205,11 +1116,83 @@ compatible type-checking across Python 2 and 3 in :mod:`future.utils` are now deprecated. -.. changelog: +.. _changelog: Summary of all changes ====================== +v0.15.0: + * Full backports of ``urllib.parse`` and other ``urllib`` submodules are exposed by ``install_aliases()``. + * ``tkinter.ttk`` support + * Initial ``surrogateescape`` support + * Additional backports: ``collections``, ``http`` constants, etc. + * Bug fixes + +v0.14.3: + * Bug fixes + +v0.14.2: + * Bug fixes + +v0.14.1: + * Bug fixes + +v0.14.0: + * New top-level ``builtins`` package on Py2 for cleaner imports. Equivalent to + ``future.builtins`` + * New top-level packages on Py2 with the same names as Py3 standard modules: + ``configparser``, ``copyreg``, ``html``, ``http``, ``xmlrpc``, ``winreg`` + +v0.13.1: + * Bug fixes + +v0.13.0: + * Cheat sheet for writing Python 2/3 compatible code + * ``to_int`` and ``from_int`` methods for ``newbytes`` + * Bug fixes + +v0.12.0: + * Add ``newobject`` and ``newlist`` types + * Improve compatibility of import hooks with ``Requests``, ``py2exe`` + * No more auto-installation of import hooks by ``future.standard_library`` + * New ``future.moves`` package + * ``past.builtins`` improved + * ``newstr.encode(..., errors='surrogateescape')`` supported + * Refactoring: ``future.standard_library`` submodules -> ``future.backports`` + * Refactoring: ``future.builtins.types`` -> ``future.types`` + * Refactoring: ``past.builtins.types`` -> ``past.types`` + * New ``listvalues`` and ``listitems`` functions in ``future.utils`` + * Many bug fixes to ``futurize``, ``future.builtins``, etc. + +v0.11.4: + * Restore Py2.6 compatibility + +v0.11.3: + * The ``futurize`` and ``pasteurize`` scripts add an explicit call to + ``future.standard_library.install_hooks()`` whenever modules affected by + PEP 3108 are imported. + + * The ``future.builtins.bytes`` constructor now accepts ``frozenset`` + objects as on Py3. + +v0.11.2: + * The ``past.translation.autotranslate`` feature now finds modules to import + more robustly and works with Python eggs. + +v0.11.1: + * Update to ``requirements_py26.txt`` for Python 2.6. Small updates to + docs and tests. + +v0.11: + * New ``past`` package with ``past.builtins`` and ``past.translation`` + modules. + +v0.10.2: + * Improvements to stdlib hooks. New context manager: + ``future.standard_library.hooks()``. + + * New ``raise_`` and ``raise_with_traceback`` functions in ``future.utils``. + v0.10: * New backported ``dict`` object with set-like ``keys``, ``values``, ``items`` @@ -236,7 +1219,7 @@ v0.8.1: * Move a few more safe ``futurize`` fixes from stage2 to stage1 * Bug fixes to :mod:`future.utils` - + v0.8: * Added Python 2.6 support @@ -245,12 +1228,12 @@ v0.8: * Removed undocumented functions from :mod:`future.utils` v0.7: - * Added a backported Py3-like ``int`` object (inherits from long). + * Added a backported Py3-like ``int`` object (inherits from ``long``). * Added utility functions for type-checking and docs about ``isinstance`` uses/alternatives. - * Fixes and stricter type-checking for bytes and str objects + * Fixes and stricter type-checking for ``bytes`` and ``str`` objects * Added many more tests for the ``futurize`` script @@ -261,7 +1244,7 @@ v0.7: v0.6: * Added a backported Py3-like ``str`` object (inherits from Py2's ``unicode``) - * Removed support for the form ``from future import *``: use ``from future.builtins import *`` instead + * Removed support for the form ``from future import *``; use ``from future.builtins import *`` instead v0.5.3: * Doc improvements @@ -274,7 +1257,7 @@ v0.5.1: * :mod:`http.server` module backported - * bytes.split() and .rsplit() bugfixes + * ``bytes.split()`` and ``.rsplit()`` bugfixes v0.5.0: * Added backported Py3-like ``bytes`` object @@ -305,26 +1288,26 @@ v0.3.5: v0.3.4: * Added ``itertools.zip_longest`` - * Updated 2to3_backcompat tests to use futurize.py + * Updated ``2to3_backcompat`` tests to use ``futurize.py`` - * Improved libfuturize fixers: correct order of imports; add imports only when necessary (except absolute_import currently) + * Improved ``libfuturize`` fixers: correct order of imports; add imports only when necessary (except ``absolute_import`` currently) v0.3.3: * Added ``python-futurize`` console script * Added ``itertools.filterfalse`` - * Removed docs about unfinished backports (urllib etc.) + * Removed docs about unfinished backports (``urllib`` etc.) - * Removed old Py2 syntax in some files that breaks py3 setup.py install + * Removed old Py2 syntax in some files that breaks py3 ``setup.py install`` v0.3.2: - * Added test.support module + * Added ``test.support`` module - * Added UserList, UserString, UserDict classes to collections module + * Added ``UserList``, ``UserString``, ``UserDict`` classes to ``collections`` module * Removed ``int`` -> ``long`` mapping - + * Added backported ``_markupbase.py`` etc. with new-style classes to fix travis-ci build problems * Added working ``html`` and ``http.client`` backported modules @@ -346,7 +1329,7 @@ v0.2.1: * Small bug fixes v0.2.0: - * Features module renamed to modified_builtins + * ``Features`` module renamed to ``modified_builtins`` * New functions added: :func:`round`, :func:`input` @@ -357,7 +1340,7 @@ v0.2.0: should have no effect on Python 3. On Python 2, it only shadows the builtins; it doesn't introduce any new names. - * End-to-end tests with Python 2 code and 2to3 now work + * End-to-end tests with Python 2 code and ``2to3`` now work v0.1.0: * first version with tests! diff --git a/docs/compatible_idioms.rst b/docs/compatible_idioms.rst new file mode 100644 index 00000000..ab478ed8 --- /dev/null +++ b/docs/compatible_idioms.rst @@ -0,0 +1,1457 @@ +.. _compatible-idioms: + +Cheat Sheet: Writing Python 2-3 compatible code +=============================================== + +- **Copyright (c):** 2013-2024 Python Charmers, Australia. +- **Author:** Ed Schofield. +- **Licence:** Creative Commons Attribution. + +A PDF version is here: https://python-future.org/compatible\_idioms.pdf + +This notebook shows you idioms for writing future-proof code that is +compatible with both versions of Python: 2 and 3. It accompanies Ed +Schofield's talk at PyCon AU 2014, "Writing 2/3 compatible code". (The +video is here: https://www.youtube.com/watch?v=KOqk8j11aAI&t=10m14s.) + +Minimum versions: + +- Python 2: 2.7+ +- Python 3: 3.4+ + +Setup +----- + +The imports below refer to these ``pip``-installable packages on PyPI: + +:: + + import future # pip install future + import builtins # pip install future + import past # pip install future + import six # pip install six + +The following scripts are also ``pip``-installable: + +:: + + futurize # pip install future + pasteurize # pip install future + +See https://python-future.org and https://pythonhosted.org/six/ for more +information. + +Essential syntax differences +---------------------------- + +print +~~~~~ + +.. code:: python + + # Python 2 only: + print 'Hello' +.. code:: python + + # Python 2 and 3: + print('Hello') +To print multiple strings, import ``print_function`` to prevent Py2 from +interpreting it as a tuple: + +.. code:: python + + # Python 2 only: + print 'Hello', 'Guido' +.. code:: python + + # Python 2 and 3: + from __future__ import print_function # (at top of module) + + print('Hello', 'Guido') +.. code:: python + + # Python 2 only: + print >> sys.stderr, 'Hello' +.. code:: python + + # Python 2 and 3: + from __future__ import print_function + + print('Hello', file=sys.stderr) +.. code:: python + + # Python 2 only: + print 'Hello', +.. code:: python + + # Python 2 and 3: + from __future__ import print_function + + print('Hello', end='') +Raising exceptions +~~~~~~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + raise ValueError, "dodgy value" +.. code:: python + + # Python 2 and 3: + raise ValueError("dodgy value") +Raising exceptions with a traceback: + +.. code:: python + + # Python 2 only: + traceback = sys.exc_info()[2] + raise ValueError, "dodgy value", traceback +.. code:: python + + # Python 3 only: + raise ValueError("dodgy value").with_traceback() +.. code:: python + + # Python 2 and 3: option 1 + from six import reraise as raise_ + # or + from future.utils import raise_ + + traceback = sys.exc_info()[2] + raise_(ValueError, "dodgy value", traceback) +.. code:: python + + # Python 2 and 3: option 2 + from future.utils import raise_with_traceback + + raise_with_traceback(ValueError("dodgy value")) +Exception chaining (PEP 3134): + +.. code:: python + + # Setup: + class DatabaseError(Exception): + pass +.. code:: python + + # Python 3 only + class FileDatabase: + def __init__(self, filename): + try: + self.file = open(filename) + except IOError as exc: + raise DatabaseError('failed to open') from exc +.. code:: python + + # Python 2 and 3: + from future.utils import raise_from + + class FileDatabase: + def __init__(self, filename): + try: + self.file = open(filename) + except IOError as exc: + raise_from(DatabaseError('failed to open'), exc) +.. code:: python + + # Testing the above: + try: + fd = FileDatabase('non_existent_file.txt') + except Exception as e: + assert isinstance(e.__cause__, IOError) # FileNotFoundError on Py3.3+ inherits from IOError +Catching exceptions +~~~~~~~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + try: + ... + except ValueError, e: + ... +.. code:: python + + # Python 2 and 3: + try: + ... + except ValueError as e: + ... +Division +~~~~~~~~ + +Integer division (rounding down): + +.. code:: python + + # Python 2 only: + assert 2 / 3 == 0 +.. code:: python + + # Python 2 and 3: + assert 2 // 3 == 0 +"True division" (float division): + +.. code:: python + + # Python 3 only: + assert 3 / 2 == 1.5 +.. code:: python + + # Python 2 and 3: + from __future__ import division # (at top of module) + + assert 3 / 2 == 1.5 +"Old division" (i.e. compatible with Py2 behaviour): + +.. code:: python + + # Python 2 only: + a = b / c # with any types +.. code:: python + + # Python 2 and 3: + from past.utils import old_div + + a = old_div(b, c) # always same as / on Py2 +Long integers +~~~~~~~~~~~~~ + +Short integers are gone in Python 3 and ``long`` has become ``int`` +(without the trailing ``L`` in the ``repr``). + +.. code:: python + + # Python 2 only + k = 9223372036854775808L + + # Python 2 and 3: + k = 9223372036854775808 +.. code:: python + + # Python 2 only + bigint = 1L + + # Python 2 and 3 + from builtins import int + bigint = int(1) +To test whether a value is an integer (of any kind): + +.. code:: python + + # Python 2 only: + if isinstance(x, (int, long)): + ... + + # Python 3 only: + if isinstance(x, int): + ... + + # Python 2 and 3: option 1 + from builtins import int # subclass of long on Py2 + + if isinstance(x, int): # matches both int and long on Py2 + ... + + # Python 2 and 3: option 2 + from past.builtins import long + + if isinstance(x, (int, long)): + ... +Octal constants +~~~~~~~~~~~~~~~ + +.. code:: python + + 0644 # Python 2 only +.. code:: python + + 0o644 # Python 2 and 3 +Backtick repr +~~~~~~~~~~~~~ + +.. code:: python + + `x` # Python 2 only +.. code:: python + + repr(x) # Python 2 and 3 +Metaclasses +~~~~~~~~~~~ + +.. code:: python + + class BaseForm(object): + pass + + class FormType(type): + pass +.. code:: python + + # Python 2 only: + class Form(BaseForm): + __metaclass__ = FormType + pass +.. code:: python + + # Python 3 only: + class Form(BaseForm, metaclass=FormType): + pass +.. code:: python + + # Python 2 and 3: + from six import with_metaclass + # or + from future.utils import with_metaclass + + class Form(with_metaclass(FormType, BaseForm)): + pass +Strings and bytes +----------------- + +Unicode (text) string literals +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you are upgrading an existing Python 2 codebase, it may be preferable +to mark up all string literals as unicode explicitly with ``u`` +prefixes: + +.. code:: python + + # Python 2 only + s1 = 'The Zen of Python' + s2 = u'きたないのよりきれいな方がいい\n' + + # Python 2 and 3 + s1 = u'The Zen of Python' + s2 = u'きたないのよりきれいな方がいい\n' +The ``futurize`` and ``python-modernize`` tools do not currently offer +an option to do this automatically. + +If you are writing code for a new project or new codebase, you can use +this idiom to make all string literals in a module unicode strings: + +.. code:: python + + # Python 2 and 3 + from __future__ import unicode_literals # at top of module + + s1 = 'The Zen of Python' + s2 = 'きたないのよりきれいな方がいい\n' +See https://python-future.org/unicode\_literals.html for more discussion +on which style to use. + +Byte-string literals +~~~~~~~~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only + s = 'This must be a byte-string' + + # Python 2 and 3 + s = b'This must be a byte-string' +To loop over a byte-string with possible high-bit characters, obtaining +each character as a byte-string of length 1: + +.. code:: python + + # Python 2 only: + for bytechar in 'byte-string with high-bit chars like \xf9': + ... + + # Python 3 only: + for myint in b'byte-string with high-bit chars like \xf9': + bytechar = bytes([myint]) + + # Python 2 and 3: + from builtins import bytes + for myint in bytes(b'byte-string with high-bit chars like \xf9'): + bytechar = bytes([myint]) +As an alternative, ``chr()`` and ``.encode('latin-1')`` can be used to +convert an int into a 1-char byte string: + +.. code:: python + + # Python 3 only: + for myint in b'byte-string with high-bit chars like \xf9': + char = chr(myint) # returns a unicode string + bytechar = char.encode('latin-1') + + # Python 2 and 3: + from builtins import bytes, chr + for myint in bytes(b'byte-string with high-bit chars like \xf9'): + char = chr(myint) # returns a unicode string + bytechar = char.encode('latin-1') # forces returning a byte str +basestring +~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + a = u'abc' + b = 'def' + assert (isinstance(a, basestring) and isinstance(b, basestring)) + + # Python 2 and 3: alternative 1 + from past.builtins import basestring # pip install future + + a = u'abc' + b = b'def' + assert (isinstance(a, basestring) and isinstance(b, basestring)) +.. code:: python + + # Python 2 and 3: alternative 2: refactor the code to avoid considering + # byte-strings as strings. + + from builtins import str + a = u'abc' + b = b'def' + c = b.decode() + assert isinstance(a, str) and isinstance(c, str) + # ... +unicode +~~~~~~~ + +.. code:: python + + # Python 2 only: + templates = [u"blog/blog_post_detail_%s.html" % unicode(slug)] +.. code:: python + + # Python 2 and 3: alternative 1 + from builtins import str + templates = [u"blog/blog_post_detail_%s.html" % str(slug)] +.. code:: python + + # Python 2 and 3: alternative 2 + from builtins import str as text + templates = [u"blog/blog_post_detail_%s.html" % text(slug)] +StringIO +~~~~~~~~ + +.. code:: python + + # Python 2 only: + from StringIO import StringIO + # or: + from cStringIO import StringIO + + # Python 2 and 3: + from io import BytesIO # for handling byte strings + from io import StringIO # for handling unicode strings +Imports relative to a package +----------------------------- + +Suppose the package is: + +:: + + mypackage/ + __init__.py + submodule1.py + submodule2.py + + +and the code below is in ``submodule1.py``: + +.. code:: python + + # Python 2 only: + import submodule2 +.. code:: python + + # Python 2 and 3: + from . import submodule2 +.. code:: python + + # Python 2 and 3: + # To make Py2 code safer (more like Py3) by preventing + # implicit relative imports, you can also add this to the top: + from __future__ import absolute_import +Dictionaries +------------ + +.. code:: python + + heights = {'Fred': 175, 'Anne': 166, 'Joe': 192} +Iterating through ``dict`` keys/values/items +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Iterable dict keys: + +.. code:: python + + # Python 2 only: + for key in heights.iterkeys(): + ... +.. code:: python + + # Python 2 and 3: + for key in heights: + ... +Iterable dict values: + +.. code:: python + + # Python 2 only: + for value in heights.itervalues(): + ... +.. code:: python + + # Idiomatic Python 3 + for value in heights.values(): # extra memory overhead on Py2 + ... +.. code:: python + + # Python 2 and 3: option 1 + from builtins import dict + + heights = dict(Fred=175, Anne=166, Joe=192) + for key in heights.values(): # efficient on Py2 and Py3 + ... +.. code:: python + + # Python 2 and 3: option 2 + from future.utils import itervalues + # or + from six import itervalues + + for key in itervalues(heights): + ... +Iterable dict items: + +.. code:: python + + # Python 2 only: + for (key, value) in heights.iteritems(): + ... +.. code:: python + + # Python 2 and 3: option 1 + for (key, value) in heights.items(): # inefficient on Py2 + ... +.. code:: python + + # Python 2 and 3: option 2 + from future.utils import viewitems + + for (key, value) in viewitems(heights): # also behaves like a set + ... +.. code:: python + + # Python 2 and 3: option 3 + from future.utils import iteritems + # or + from six import iteritems + + for (key, value) in iteritems(heights): + ... +dict keys/values/items as a list +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +dict keys as a list: + +.. code:: python + + # Python 2 only: + keylist = heights.keys() + assert isinstance(keylist, list) +.. code:: python + + # Python 2 and 3: + keylist = list(heights) + assert isinstance(keylist, list) +dict values as a list: + +.. code:: python + + # Python 2 only: + heights = {'Fred': 175, 'Anne': 166, 'Joe': 192} + valuelist = heights.values() + assert isinstance(valuelist, list) +.. code:: python + + # Python 2 and 3: option 1 + valuelist = list(heights.values()) # inefficient on Py2 +.. code:: python + + # Python 2 and 3: option 2 + from builtins import dict + + heights = dict(Fred=175, Anne=166, Joe=192) + valuelist = list(heights.values()) +.. code:: python + + # Python 2 and 3: option 3 + from future.utils import listvalues + + valuelist = listvalues(heights) +.. code:: python + + # Python 2 and 3: option 4 + from future.utils import itervalues + # or + from six import itervalues + + valuelist = list(itervalues(heights)) +dict items as a list: + +.. code:: python + + # Python 2 and 3: option 1 + itemlist = list(heights.items()) # inefficient on Py2 +.. code:: python + + # Python 2 and 3: option 2 + from future.utils import listitems + + itemlist = listitems(heights) +.. code:: python + + # Python 2 and 3: option 3 + from future.utils import iteritems + # or + from six import iteritems + + itemlist = list(iteritems(heights)) +Custom class behaviour +---------------------- + +Custom iterators +~~~~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only + class Upper(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def next(self): # Py2-style + return self._iter.next().upper() + def __iter__(self): + return self + + itr = Upper('hello') + assert itr.next() == 'H' # Py2-style + assert list(itr) == list('ELLO') +.. code:: python + + # Python 2 and 3: option 1 + from builtins import object + + class Upper(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def __next__(self): # Py3-style iterator interface + return next(self._iter).upper() # builtin next() function calls + def __iter__(self): + return self + + itr = Upper('hello') + assert next(itr) == 'H' # compatible style + assert list(itr) == list('ELLO') +.. code:: python + + # Python 2 and 3: option 2 + from future.utils import implements_iterator + + @implements_iterator + class Upper(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def __next__(self): # Py3-style iterator interface + return next(self._iter).upper() # builtin next() function calls + def __iter__(self): + return self + + itr = Upper('hello') + assert next(itr) == 'H' + assert list(itr) == list('ELLO') +Custom ``__str__`` methods +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + class MyClass(object): + def __unicode__(self): + return 'Unicode string: \u5b54\u5b50' + def __str__(self): + return unicode(self).encode('utf-8') + + a = MyClass() + print(a) # prints encoded string +.. code:: python + + # Python 2 and 3: + from future.utils import python_2_unicode_compatible + + @python_2_unicode_compatible + class MyClass(object): + def __str__(self): + return u'Unicode string: \u5b54\u5b50' + + a = MyClass() + print(a) # prints string encoded as utf-8 on Py2 + +.. parsed-literal:: + + Unicode string: 孔子 + + +Custom ``__nonzero__`` vs ``__bool__`` method: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + class AllOrNothing(object): + def __init__(self, l): + self.l = l + def __nonzero__(self): + return all(self.l) + + container = AllOrNothing([0, 100, 200]) + assert not bool(container) +.. code:: python + + # Python 2 and 3: + from builtins import object + + class AllOrNothing(object): + def __init__(self, l): + self.l = l + def __bool__(self): + return all(self.l) + + container = AllOrNothing([0, 100, 200]) + assert not bool(container) +Lists versus iterators +---------------------- + +xrange +~~~~~~ + +.. code:: python + + # Python 2 only: + for i in xrange(10**8): + ... +.. code:: python + + # Python 2 and 3: forward-compatible + from builtins import range + for i in range(10**8): + ... +.. code:: python + + # Python 2 and 3: backward-compatible + from past.builtins import xrange + for i in xrange(10**8): + ... +range +~~~~~ + +.. code:: python + + # Python 2 only + mylist = range(5) + assert mylist == [0, 1, 2, 3, 4] +.. code:: python + + # Python 2 and 3: forward-compatible: option 1 + mylist = list(range(5)) # copies memory on Py2 + assert mylist == [0, 1, 2, 3, 4] +.. code:: python + + # Python 2 and 3: forward-compatible: option 2 + from builtins import range + + mylist = list(range(5)) + assert mylist == [0, 1, 2, 3, 4] +.. code:: python + + # Python 2 and 3: option 3 + from future.utils import lrange + + mylist = lrange(5) + assert mylist == [0, 1, 2, 3, 4] +.. code:: python + + # Python 2 and 3: backward compatible + from past.builtins import range + + mylist = range(5) + assert mylist == [0, 1, 2, 3, 4] +map +~~~ + +.. code:: python + + # Python 2 only: + mynewlist = map(f, myoldlist) + assert mynewlist == [f(x) for x in myoldlist] +.. code:: python + + # Python 2 and 3: option 1 + # Idiomatic Py3, but inefficient on Py2 + mynewlist = list(map(f, myoldlist)) + assert mynewlist == [f(x) for x in myoldlist] +.. code:: python + + # Python 2 and 3: option 2 + from builtins import map + + mynewlist = list(map(f, myoldlist)) + assert mynewlist == [f(x) for x in myoldlist] +.. code:: python + + # Python 2 and 3: option 3 + try: + import itertools.imap as map + except ImportError: + pass + + mynewlist = list(map(f, myoldlist)) # inefficient on Py2 + assert mynewlist == [f(x) for x in myoldlist] +.. code:: python + + # Python 2 and 3: option 4 + from future.utils import lmap + + mynewlist = lmap(f, myoldlist) + assert mynewlist == [f(x) for x in myoldlist] +.. code:: python + + # Python 2 and 3: option 5 + from past.builtins import map + + mynewlist = map(f, myoldlist) + assert mynewlist == [f(x) for x in myoldlist] +imap +~~~~ + +.. code:: python + + # Python 2 only: + from itertools import imap + + myiter = imap(func, myoldlist) + assert isinstance(myiter, iter) +.. code:: python + + # Python 3 only: + myiter = map(func, myoldlist) + assert isinstance(myiter, iter) +.. code:: python + + # Python 2 and 3: option 1 + from builtins import map + + myiter = map(func, myoldlist) + assert isinstance(myiter, iter) +.. code:: python + + # Python 2 and 3: option 2 + try: + import itertools.imap as map + except ImportError: + pass + + myiter = map(func, myoldlist) + assert isinstance(myiter, iter) +.. code:: python + + # Python 2 and 3: option 3 + from six.moves import map + + myiter = map(func, myoldlist) + assert isinstance(myiter, iter) + +zip, izip +~~~~~~~~~ + +As above with ``zip`` and ``itertools.izip``. + +filter, ifilter +~~~~~~~~~~~~~~~ + +As above with ``filter`` and ``itertools.ifilter`` too. + +Other builtins +-------------- + +File IO with open() +~~~~~~~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only + f = open('myfile.txt') + data = f.read() # as a byte string + text = data.decode('utf-8') + + # Python 2 and 3: alternative 1 + from io import open + f = open('myfile.txt', 'rb') + data = f.read() # as bytes + text = data.decode('utf-8') # unicode, not bytes + + # Python 2 and 3: alternative 2 + from io import open + f = open('myfile.txt', encoding='utf-8') + text = f.read() # unicode, not bytes +reduce() +~~~~~~~~ + +.. code:: python + + # Python 2 only: + assert reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) == 1+2+3+4+5 +.. code:: python + + # Python 2 and 3: + from functools import reduce + + assert reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) == 1+2+3+4+5 +raw\_input() +~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + name = raw_input('What is your name? ') + assert isinstance(name, str) # native str +.. code:: python + + # Python 2 and 3: + from builtins import input + + name = input('What is your name? ') + assert isinstance(name, str) # native str on Py2 and Py3 +input() +~~~~~~~ + +.. code:: python + + # Python 2 only: + input("Type something safe please: ") +.. code:: python + + # Python 2 and 3 + from builtins import input + eval(input("Type something safe please: ")) +Warning: using either of these is **unsafe** with untrusted input. + +file() +~~~~~~ + +.. code:: python + + # Python 2 only: + f = file(pathname) +.. code:: python + + # Python 2 and 3: + f = open(pathname) + + # But preferably, use this: + from io import open + f = open(pathname, 'rb') # if f.read() should return bytes + # or + f = open(pathname, 'rt') # if f.read() should return unicode text +exec +~~~~ + +.. code:: python + + # Python 2 only: + exec 'x = 10' + + # Python 2 and 3: + exec('x = 10') +.. code:: python + + # Python 2 only: + g = globals() + exec 'x = 10' in g + + # Python 2 and 3: + g = globals() + exec('x = 10', g) +.. code:: python + + # Python 2 only: + l = locals() + exec 'x = 10' in g, l + + # Python 2 and 3: + exec('x = 10', g, l) +execfile() +~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + execfile('myfile.py') +.. code:: python + + # Python 2 and 3: alternative 1 + from past.builtins import execfile + + execfile('myfile.py') +.. code:: python + + # Python 2 and 3: alternative 2 + exec(compile(open('myfile.py').read())) + + # This can sometimes cause this: + # SyntaxError: function ... uses import * and bare exec ... + # See https://github.com/PythonCharmers/python-future/issues/37 +unichr() +~~~~~~~~ + +.. code:: python + + # Python 2 only: + assert unichr(8364) == '€' +.. code:: python + + # Python 3 only: + assert chr(8364) == '€' +.. code:: python + + # Python 2 and 3: + from builtins import chr + assert chr(8364) == '€' +intern() +~~~~~~~~ + +.. code:: python + + # Python 2 only: + intern('mystring') +.. code:: python + + # Python 3 only: + from sys import intern + intern('mystring') +.. code:: python + + # Python 2 and 3: alternative 1 + from past.builtins import intern + intern('mystring') +.. code:: python + + # Python 2 and 3: alternative 2 + from six.moves import intern + intern('mystring') +.. code:: python + + # Python 2 and 3: alternative 3 + from future.standard_library import install_aliases + install_aliases() + from sys import intern + intern('mystring') +.. code:: python + + # Python 2 and 3: alternative 2 + try: + from sys import intern + except ImportError: + pass + intern('mystring') +apply() +~~~~~~~ + +.. code:: python + + args = ('a', 'b') + kwargs = {'kwarg1': True} +.. code:: python + + # Python 2 only: + apply(f, args, kwargs) +.. code:: python + + # Python 2 and 3: alternative 1 + f(*args, **kwargs) +.. code:: python + + # Python 2 and 3: alternative 2 + from past.builtins import apply + apply(f, args, kwargs) +chr() +~~~~~ + +.. code:: python + + # Python 2 only: + assert chr(64) == b'@' + assert chr(200) == b'\xc8' +.. code:: python + + # Python 3 only: option 1 + assert chr(64).encode('latin-1') == b'@' + assert chr(0xc8).encode('latin-1') == b'\xc8' +.. code:: python + + # Python 2 and 3: option 1 + from builtins import chr + + assert chr(64).encode('latin-1') == b'@' + assert chr(0xc8).encode('latin-1') == b'\xc8' +.. code:: python + + # Python 3 only: option 2 + assert bytes([64]) == b'@' + assert bytes([0xc8]) == b'\xc8' +.. code:: python + + # Python 2 and 3: option 2 + from builtins import bytes + + assert bytes([64]) == b'@' + assert bytes([0xc8]) == b'\xc8' +cmp() +~~~~~ + +.. code:: python + + # Python 2 only: + assert cmp('a', 'b') < 0 and cmp('b', 'a') > 0 and cmp('c', 'c') == 0 +.. code:: python + + # Python 2 and 3: alternative 1 + from past.builtins import cmp + assert cmp('a', 'b') < 0 and cmp('b', 'a') > 0 and cmp('c', 'c') == 0 +.. code:: python + + # Python 2 and 3: alternative 2 + cmp = lambda(x, y): (x > y) - (x < y) + assert cmp('a', 'b') < 0 and cmp('b', 'a') > 0 and cmp('c', 'c') == 0 +reload() +~~~~~~~~ + +.. code:: python + + # Python 2 only: + reload(mymodule) +.. code:: python + + # Python 2 and 3 + from imp import reload + reload(mymodule) +Standard library +---------------- + +dbm modules +~~~~~~~~~~~ + +.. code:: python + + # Python 2 only + import anydbm + import whichdb + import dbm + import dumbdbm + import gdbm + + # Python 2 and 3: alternative 1 + from future import standard_library + standard_library.install_aliases() + + import dbm + import dbm.ndbm + import dbm.dumb + import dbm.gnu + + # Python 2 and 3: alternative 2 + from future.moves import dbm + from future.moves.dbm import dumb + from future.moves.dbm import ndbm + from future.moves.dbm import gnu + + # Python 2 and 3: alternative 3 + from six.moves import dbm_gnu + # (others not supported) +commands / subprocess modules +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only + from commands import getoutput, getstatusoutput + + # Python 2 and 3 + from future import standard_library + standard_library.install_aliases() + + from subprocess import getoutput, getstatusoutput +StringIO module +~~~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only + from StringIO import StringIO + from cStringIO import StringIO +.. code:: python + + # Python 2 and 3 + from io import BytesIO + # and refactor StringIO() calls to BytesIO() if passing byte-strings +http module +~~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + import httplib + import Cookie + import cookielib + import BaseHTTPServer + import SimpleHTTPServer + import CGIHttpServer + + # Python 2 and 3 (after ``pip install future``): + import http.client + import http.cookies + import http.cookiejar + import http.server +xmlrpc module +~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + import DocXMLRPCServer + import SimpleXMLRPCServer + + # Python 2 and 3 (after ``pip install future``): + import xmlrpc.server +.. code:: python + + # Python 2 only: + import xmlrpclib + + # Python 2 and 3 (after ``pip install future``): + import xmlrpc.client +html escaping and entities +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 and 3: + from cgi import escape + + # Safer (Python 2 and 3, after ``pip install future``): + from html import escape + + # Python 2 only: + from htmlentitydefs import codepoint2name, entitydefs, name2codepoint + + # Python 2 and 3 (after ``pip install future``): + from html.entities import codepoint2name, entitydefs, name2codepoint +html parsing +~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + from HTMLParser import HTMLParser + + # Python 2 and 3 (after ``pip install future``) + from html.parser import HTMLParser + + # Python 2 and 3 (alternative 2): + from future.moves.html.parser import HTMLParser +urllib module +~~~~~~~~~~~~~ + +``urllib`` is the hardest module to use from Python 2/3 compatible code. +You might want to switch to Requests (http://python-requests.org) instead. + +.. code:: python + + # Python 2 only: + from urlparse import urlparse + from urllib import urlencode + from urllib2 import urlopen, Request, HTTPError +.. code:: python + + # Python 3 only: + from urllib.parse import urlparse, urlencode + from urllib.request import urlopen, Request + from urllib.error import HTTPError +.. code:: python + + # Python 2 and 3: easiest option + from future.standard_library import install_aliases + install_aliases() + + from urllib.parse import urlparse, urlencode + from urllib.request import urlopen, Request + from urllib.error import HTTPError +.. code:: python + + # Python 2 and 3: alternative 2 + from future.standard_library import hooks + + with hooks(): + from urllib.parse import urlparse, urlencode + from urllib.request import urlopen, Request + from urllib.error import HTTPError +.. code:: python + + # Python 2 and 3: alternative 3 + from future.moves.urllib.parse import urlparse, urlencode + from future.moves.urllib.request import urlopen, Request + from future.moves.urllib.error import HTTPError + # or + from six.moves.urllib.parse import urlparse, urlencode + from six.moves.urllib.request import urlopen + from six.moves.urllib.error import HTTPError +.. code:: python + + # Python 2 and 3: alternative 4 + try: + from urllib.parse import urlparse, urlencode + from urllib.request import urlopen, Request + from urllib.error import HTTPError + except ImportError: + from urlparse import urlparse + from urllib import urlencode + from urllib2 import urlopen, Request, HTTPError +Tkinter +~~~~~~~ + +.. code:: python + + # Python 2 only: + import Tkinter + import Dialog + import FileDialog + import ScrolledText + import SimpleDialog + import Tix + import Tkconstants + import Tkdnd + import tkColorChooser + import tkCommonDialog + import tkFileDialog + import tkFont + import tkMessageBox + import tkSimpleDialog + import ttk + + # Python 2 and 3 (after ``pip install future``): + import tkinter + import tkinter.dialog + import tkinter.filedialog + import tkinter.scrolledtext + import tkinter.simpledialog + import tkinter.tix + import tkinter.constants + import tkinter.dnd + import tkinter.colorchooser + import tkinter.commondialog + import tkinter.filedialog + import tkinter.font + import tkinter.messagebox + import tkinter.simpledialog + import tkinter.ttk +socketserver +~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + import SocketServer + + # Python 2 and 3 (after ``pip install future``): + import socketserver +copy\_reg, copyreg +~~~~~~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + import copy_reg + + # Python 2 and 3 (after ``pip install future``): + import copyreg +configparser +~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + from ConfigParser import ConfigParser + + # Python 2 and 3 (after ``pip install configparser``): + from configparser import ConfigParser +queue +~~~~~ + +.. code:: python + + # Python 2 only: + from Queue import Queue, heapq, deque + + # Python 2 and 3 (after ``pip install future``): + from queue import Queue, heapq, deque +repr, reprlib +~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + from repr import aRepr, repr + + # Python 2 and 3 (after ``pip install future``): + from reprlib import aRepr, repr +UserDict, UserList, UserString +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + from UserDict import UserDict + from UserList import UserList + from UserString import UserString + + # Python 3 only: + from collections import UserDict, UserList, UserString + + # Python 2 and 3: alternative 1 + from future.moves.collections import UserDict, UserList, UserString + + # Python 2 and 3: alternative 2 + from six.moves import UserDict, UserList, UserString + + # Python 2 and 3: alternative 3 + from future.standard_library import install_aliases + install_aliases() + from collections import UserDict, UserList, UserString +itertools: filterfalse, zip\_longest +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: python + + # Python 2 only: + from itertools import ifilterfalse, izip_longest + + # Python 3 only: + from itertools import filterfalse, zip_longest + + # Python 2 and 3: alternative 1 + from future.moves.itertools import filterfalse, zip_longest + + # Python 2 and 3: alternative 2 + from six.moves import filterfalse, zip_longest + + # Python 2 and 3: alternative 3 + from future.standard_library import install_aliases + install_aliases() + from itertools import filterfalse, zip_longest diff --git a/docs/conf.py b/docs/conf.py index 1b2f5411..cf4606c7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,7 +13,7 @@ from __future__ import absolute_import, print_function import sys, os -import sphinx_bootstrap_theme +# import sphinx_bootstrap_theme # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -28,7 +28,14 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode'] +extensions = ['sphinx.ext.autodoc', + 'sphinx.ext.intersphinx', + 'sphinx.ext.ifconfig', + 'sphinx.ext.viewcode', + 'pallets_sphinx_themes', + # 'sphinxcontrib.napoleon' # see https://sphinxcontrib-napoleon.readthedocs.io/ + # 'sphinx.ext.napoleon' # use this in Sphinx 1.3+ + ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -44,7 +51,7 @@ # General information about the project. project = u'Python-Future' -copyright = u'2013-2014, Python Charmers Pty Ltd, Australia' +copyright = u'2013-2019, Python Charmers Pty Ltd, Australia' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -52,8 +59,8 @@ # # if 'dev' in release: # release = release.split('dev')[0] + 'dev' -release = '0.11.0-dev' -version = '.'.join(release.split('.')[:2]) +# release = '0.12.5-dev' +# version = release # was: '.'.join(release.split('.')[:2]) # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -94,13 +101,75 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'bootstrap' -html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() +html_theme = 'jinja' +# html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +html_theme_options = { + # Navigation bar title. (Default: ``project`` value) + #'navbar_title': "Python-Future", + + # Tab name for entire site. (Default: "Site") + 'navbar_site_name': "Contents", + + # A list of tuples containing pages or urls to link to. + # Valid tuples should be in the following forms: + # (name, page) # a link to a page + # (name, "/aa/bb", 1) # a link to an arbitrary relative url + # (name, "http://example.com", True) # arbitrary absolute url + # Note the "1" or "True" value above as the third argument to indicate + # an arbitrary url. + 'navbar_links': [ + ("Overview", "overview"), + ("Cheat Sheet", "compatible_idioms.html", True), + ("FAQ", "faq.html", True), + # ("Link", "http://example.com", True), + ], + + # Render the next and previous page links in navbar. (Default: true) + 'navbar_sidebarrel': False, + + # Render the current pages TOC in the navbar. (Default: true) + 'navbar_pagenav': True, + + # Global TOC depth for "site" navbar tab. (Default: 1) + # Switching to -1 shows all levels. + 'globaltoc_depth': 3, + + # Include hidden TOCs in Site navbar? + # + # Note: If this is "false", you cannot have mixed ``:hidden:`` and + # non-hidden ``toctree`` directives in the same page, or else the build + # will break. + # + # Values: "true" (default) or "false" + 'globaltoc_includehidden': "true", + + # HTML navbar class (Default: "navbar") to attach to
element. + # For black navbar, do "navbar navbar-inverse" + 'navbar_class': "navbar navbar-inverse", + + # Fix navigation bar to top of page? + # Values: "true" (default) or "false" + 'navbar_fixed_top': "true", + + # Location of link to source. + # Options are "nav" (default), "footer" or anything else to exclude. + 'source_link_position': "none", + + # Bootswatch (http://bootswatch.com/) theme. + # + # Options are nothing with "" (default) or the name of a valid theme + # such as "amelia" or "cosmo" or "united". + 'bootswatch_theme': "cerulean", + + # Choose Bootstrap version. + # Values: "3" (default) or "2" (in quotes) + 'bootstrap_version': "3", +} + # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] @@ -114,12 +183,12 @@ # The name of an image file (relative to this directory) to place at the top # of the sidebar. -html_logo = '_static/python-future-logo.png' +html_logo = '_static/python-future-logo-textless-transparent.png' # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +html_favicon = "_static/python-future-icon-32.ico" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -136,9 +205,12 @@ # Custom sidebar templates, maps document names to template names. html_sidebars = { - 'index': ['sidebarlogo.html', 'sidebarintro.html', - 'sourcelink.html', 'searchbox.html'], - '**': ['sidebarlogo.html', 'localtoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html'] + '**': ['sidebarintro.html', + 'sidebartoc.html', + # 'sourcelink.html', + #'searchbox.html', + ] + # '**': ['sidebarlogo.html', 'localtoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html'] } # Additional templates that should be rendered to pages, maps page names to @@ -155,10 +227,10 @@ #html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +html_show_sourcelink = False # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +html_show_sphinx = False # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True @@ -242,7 +314,7 @@ # dir menu entry, description, category) texinfo_documents = [ ('index', 'Python-Future', u'Python-Future Documentation', - u'Python Charmers', 'Python-Future', 'Easy support for Python 2 and 3', + u'Python Charmers', 'Python-Future', 'Easy compatibility for Python 2 and 3', 'Miscellaneous'), ] diff --git a/docs/contents.rst.inc b/docs/contents.rst.inc index e6935849..869b3642 100644 --- a/docs/contents.rst.inc +++ b/docs/contents.rst.inc @@ -1,25 +1,26 @@ -Contents: ---------- +Contents +======== .. toctree:: - :maxdepth: 2 + :maxdepth: 3 + whatsnew overview quickstart + compatible_idioms imports what_else automatic_conversion - porting - standard_library_incompatibilities faq + stdlib_incompatibilities + older_interfaces changelog credits reference Indices and tables ------------------- +****************** * :ref:`genindex` * :ref:`modindex` * :ref:`search` - diff --git a/docs/conversion_limitations.rst b/docs/conversion_limitations.rst new file mode 100644 index 00000000..c2b15303 --- /dev/null +++ b/docs/conversion_limitations.rst @@ -0,0 +1,27 @@ +.. _futurize-limitations: + +Known limitations +----------------- + +``futurize`` and ``pasteurize`` are useful to automate much of the +work of porting, particularly the boring repetitive text substitutions. They also +help to flag which parts of the code require attention. + +Nevertheless, ``futurize`` and ``pasteurize`` are still incomplete and make +some mistakes, like 2to3, on which they are based. Please report bugs on +`GitHub `_. Contributions to +the ``lib2to3``-based fixers for ``futurize`` and ``pasteurize`` are +particularly welcome! Please see :ref:`contributing`. + +``futurize`` doesn't currently make the following change automatically: + +1. Strings containing ``\U`` produce a ``SyntaxError`` on Python 3. An example is:: + + s = 'C:\Users'. + + Python 2 expands this to ``s = 'C:\\Users'``, but Python 3 requires a raw + prefix (``r'...'``). This also applies to multi-line strings (including + multi-line docstrings). + +Also see the tests in ``future/tests/test_futurize.py`` marked +``@expectedFailure`` or ``@skip`` for known limitations. diff --git a/docs/credits.rst b/docs/credits.rst index be1d6e25..4c029efd 100644 --- a/docs/credits.rst +++ b/docs/credits.rst @@ -1,41 +1,25 @@ -Credits -======= - -:Author: Ed Schofield -:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte - Ltd, Singapore. http://pythoncharmers.com -:Others: - The backported ``super()`` and ``range()`` functions are - derived from Ryan Kelly's ``magicsuper`` module and Dan - Crosta's ``xrange`` module. - - The ``futurize`` script uses ``lib2to3``, ``lib3to2``, and - parts of Armin Ronacher's ``python-modernize`` code. - - The ``python_2_unicode_compatible`` decorator is from - Django. The ``implements_iterator`` and ``with_metaclass`` - decorators are from Jinja2. - - ``future`` incorporates the ``six`` module by Benjamin - Peterson as ``future.utils.six``. - - Documentation is generated using ``sphinx`` using an - adaptation of Armin Ronacher's stylesheets from Jinja2. +Licensing and credits +===================== .. _licence: -Licensing ---------- +Licence +------- The software is distributed under an MIT licence. The text is as follows -(from LICENSE.txt): +(from ``LICENSE.txt``):: + + Copyright (c) 2013-2024 Python Charmers, Australia - Copyright (c) 2013-2014 Python Charmers Pty Ltd, Australia - Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -44,3 +28,154 @@ The software is distributed under an MIT licence. The text is as follows OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +.. _sponsor: + +Sponsors +-------- + +Python Charmers: https://pythoncharmers.com + +.. _authors: + +Author +------- + +Python-Future was largely written by Ed Schofield . + +Maintainers +----------- + +The project is no longer being actively maintained. Like Python 2, it should be +considered end-of-life. + +Past maintainers include: + +- Jordan M. Adler +- Liuyang Wan +- Ed Schofield + +Contributors +------------ + +Thanks to the following people for helping to improve the package: + +- Jordan Adler +- Jeroen Akkerman +- Bruno Alla +- Kyle Altendorf +- Nuno André +- Kian-Meng Ang +- Grant Bakker +- Jacob Beck +- David Bern +- Fumihiro (Ben) Bessho +- Shiva Bhusal +- Andrew Bjonnes +- Nate Bogdanowicz +- Tomer Chachamu +- Christian Clauss +- Denis Cornehl +- Joseph Curtis +- Nicolas Delaby +- Chad Dombrova +- Jon Dufresne +- Corey Farwell +- Eric Firing +- Joe Gordon +- Gabriela Gutierrez +- Maximilian Hils +- Tomáš Hrnčiar +- Miro Hrončok +- Mark Huang +- Martijn Jacobs +- Michael Joseph +- Waldemar Kornewald +- Alexey Kotlyarov +- Steve Kowalik +- Lion Krischer +- Marcin Kuzminski +- Joshua Landau +- German Larrain +- Chris Lasher +- ghanshyam lele +- Calum Lind +- Tobias Megies +- Anika Mukherji +- Jon Parise +- Matthew Parnell +- Tom Picton +- Sebastian Potasiak +- Miga Purg +- Éloi Rivard +- Greg Roodt +- Sesh Sadasivam +- Elliott Sales de Andrade +- Aiden Scandella +- Yury Selivanov +- Alexander Shadchin +- Tim Shaffer +- Christopher Slycord +- Sameera Somisetty +- Nicola Soranzo +- Louis Sautier +- Will Shanks +- Gregory P. Smith +- Chase Sterling +- Matthew Stidham +- Daniel Szoska +- Flaviu Tamas +- Roman A. Taycher +- Jeff Tratner +- Tim Tröndle +- Brad Walker +- Liuyang Wan +- Andrew Wason +- Jeff Widman +- Dan Yeaw +- Hackalog (GitHub user) +- lsm (GiHub user) +- Mystic-Mirage (GitHub user) +- str4d (GitHub user) +- ucodery (GitHub user) +- urain39 (GitHub user) +- 9seconds (GitHub user) +- Varriount (GitHub user) +- zihzihtw (GitHub user) + +Suggestions and Feedback +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Chris Adams +- Martijn Faassen +- Joe Gordon +- Lion Krischer +- Danielle Madeley +- Val Markovic +- wluebbe (GitHub user) + + +Other Credits +------------- + +- The backported ``super()`` and ``range()`` functions are derived from Ryan + Kelly's ``magicsuper`` module and Dan Crosta's ``xrange`` module. + +- The ``futurize`` and ``pasteurize`` scripts use ``lib2to3``, ``lib3to2``, and + parts of Armin Ronacher's ``python-modernize`` code. + +- The ``python_2_unicode_compatible`` decorator is from Django. The + ``implements_iterator`` and ``with_metaclass`` decorators are from Jinja2. + +- The ``exec_`` function and some others in ``future.utils`` are from the + ``six`` module by Benjamin Peterson. + +- The ``raise_`` and ``raise_with_traceback`` functions were contributed by + Jeff Tratner. + +- A working version of ``raise_from`` was contributed by Varriount (GitHub). + +- Documentation is generated with `Sphinx `_ using the + ``sphinx-bootstrap`` theme. + +- ``past.translation`` is inspired by and borrows some code from Sanjay Vinip's + ``uprefix`` module. diff --git a/docs/custom_iterators.rst b/docs/custom_iterators.rst index 712e00a6..6ff389a4 100644 --- a/docs/custom_iterators.rst +++ b/docs/custom_iterators.rst @@ -3,26 +3,92 @@ Custom iterators ---------------- -If you define your own iterators, there is an incompatibility in the -method name across Py3 and Py2. On Python 3 it is ``__next__``, whereas -on Python 2 it is ``next``. +If you define your own iterators, there is an incompatibility in the method name +to retrieve the next item across Py3 and Py2. On Python 3 it is ``__next__``, +whereas on Python 2 it is ``next``. -Use the following decorator to allow Py3-style iterators to work -identically on Py2:: +The most elegant solution to this is to derive your custom iterator class from +``builtins.object`` and define a ``__next__`` method as you normally +would on Python 3. On Python 2, ``object`` then refers to the +``future.types.newobject`` base class, which provides a fallback ``next`` +method that calls your ``__next__``. Use it as follows:: - from future.utils import implements_iterator + from builtins import object - @implements_iterator class Upper(object): def __init__(self, iterable): self._iter = iter(iterable) - def __next__(self): # note the Py3 interface + def __next__(self): # Py3-style iterator interface + return next(self._iter).upper() + def __iter__(self): + return self + + itr = Upper('hello') + assert next(itr) == 'H' + assert next(itr) == 'E' + assert list(itr) == list('LLO') + + +You can use this approach unless you are defining a custom iterator as a +subclass of a base class defined elsewhere that does not derive from +``newobject``. In that case, you can provide compatibility across +Python 2 and Python 3 using the ``next`` function from ``future.builtins``:: + + from builtins import next + + from some_module import some_base_class + + class Upper2(some_base_class): + def __init__(self, iterable): + self._iter = iter(iterable) + def __next__(self): # Py3-style iterator interface return next(self._iter).upper() def __iter__(self): return self - print(list(Upper('hello'))) + itr2 = Upper2('hello') + assert next(itr2) == 'H' + assert next(itr2) == 'E' + +``next()`` also works with regular Python 2 iterators with a ``.next`` method:: + + itr3 = iter(['one', 'three', 'five']) + assert 'next' in dir(itr3) + assert next(itr3) == 'one' + +This approach is feasible whenever your code calls the ``next()`` function +explicitly. If you consume the iterator implicitly in a ``for`` loop or +``list()`` call or by some other means, the ``future.builtins.next`` function +will not help; the third assertion below would fail on Python 2:: + + itr2 = Upper2('hello') + + assert next(itr2) == 'H' + assert next(itr2) == 'E' + assert list(itr2) == list('LLO') # fails because Py2 implicitly looks + # for a ``next`` method. + +Instead, you can use a decorator called ``implements_iterator`` from +``future.utils`` to allow Py3-style iterators to work identically on Py2, even +if they don't inherit from ``future.builtins.object``. Use it as follows:: + + from future.utils import implements_iterator + + Upper2 = implements_iterator(Upper2) + + print(list(Upper2('hello'))) # prints ['H', 'E', 'L', 'L', 'O'] -On Python 3 this decorator does nothing. +This can of course also be used with the ``@`` decorator syntax when defining +the iterator as follows:: + + @implements_iterator + class Upper2(some_base_class): + def __init__(self, iterable): + self._iter = iter(iterable) + def __next__(self): # note the Py3 interface + return next(self._iter).upper() + def __iter__(self): + return self +On Python 3, as usual, this decorator does nothing. diff --git a/docs/custom_str_methods.rst b/docs/custom_str_methods.rst index d45eb0dc..12c3c6b3 100644 --- a/docs/custom_str_methods.rst +++ b/docs/custom_str_methods.rst @@ -18,7 +18,7 @@ Py2 and define ``__str__`` to encode it as utf-8:: return u'Unicode string: \u5b54\u5b50' a = MyClass() - # This then prints the Chinese characters for Confucius: + # This then prints the name of a Chinese philosopher: print(a) This decorator is identical to the decorator of the same name in diff --git a/docs/dev_notes.rst b/docs/dev_notes.rst new file mode 100644 index 00000000..6985bca4 --- /dev/null +++ b/docs/dev_notes.rst @@ -0,0 +1,16 @@ +Notes +----- +This module only supports Python 2.7, and Python 3.4+. + +The following renames are already supported on Python 2.7 without any +additional work from us:: + + reload() -> imp.reload() + reduce() -> functools.reduce() + StringIO.StringIO -> io.StringIO + Bytes.BytesIO -> io.BytesIO + +Old things that can one day be fixed automatically by futurize.py:: + + string.uppercase -> string.ascii_uppercase # works on either Py2.7 or Py3+ + sys.maxint -> sys.maxsize # but this isn't identical diff --git a/docs/development.rst b/docs/development.rst new file mode 100644 index 00000000..a12f2ca5 --- /dev/null +++ b/docs/development.rst @@ -0,0 +1,19 @@ +.. developer-docs + +Developer docs +============== + +The easiest way to start developing ``python-future`` is as follows: + +1. Install Anaconda Python distribution + +2. Run:: + + conda install -n future2 python=2.7 pip + conda install -n future3 python=3.4 pip + + git clone https://github.com/PythonCharmers/python-future + +3. If you are using Anaconda Python distribution, this comes without a ``test`` +module on Python 2.x. Copy ``Python-2.7.6/Lib/test`` from the Python source tree +to ``~/anaconda/envs/yourenvname/lib/python2.7/site-packages/`. diff --git a/docs/dict_object.rst b/docs/dict_object.rst index 4f3a594c..165cf763 100644 --- a/docs/dict_object.rst +++ b/docs/dict_object.rst @@ -7,85 +7,86 @@ Python 3 dictionaries have ``.keys()``, ``.values()``, and ``.items()`` methods which return memory-efficient set-like iterator objects, not lists. (See `PEP 3106 `_.) -``future.builtins`` provides a Python 2 ``dict`` subclass whose :func:`keys`, -:func:`values`, and :func:`items` methods return iterators. On Python 2.7, -these iterators have the same set-like view behaviour as dictionaries in -Python 3. This can streamline code needing to iterate over large dictionaries. -For example:: +If your dictionaries are small, performance is not critical, and you don't need +the set-like behaviour of iterator objects from Python 3, you can of course +stick with standard Python 3 code in your Py2/3 compatible codebase:: + + # Assuming d is a native dict ... + + for key in d: + # code here + + for item in d.items(): + # code here + + for value in d.values(): + # code here + +In this case there will be memory overhead of list creation on Py2 for each +call to ``items``, ``values`` or ``keys``. + +For improved efficiency, ``future.builtins`` (aliased to ``builtins``) provides +a Python 2 ``dict`` subclass whose :func:`keys`, :func:`values`, and +:func:`items` methods return iterators on all versions of Python >= 2.7. On +Python 2.7, these iterators also have the same set-like view behaviour as +dictionaries in Python 3. This can streamline code that iterates over large +dictionaries. For example:: from __future__ import print_function - from future.builtins import dict, range - + from builtins import dict, range + # Memory-efficient construction: d = dict((i, i**2) for i in range(10**7)) - + assert not isinstance(d.items(), list) - - # Because items() is memory-efficient, so is this: - d2 = dict((i_squared, i) for (i, i_squared) in d.items()) + # Because items() is memory-efficient, so is this: + d2 = dict((v, k) for (k, v) in d.items()) -On Python 2.6, these methods currently return iterators that do not support the -new Py3 set-like behaviour. +As usual, on Python 3 ``dict`` imported from either ``builtins`` or +``future.builtins`` is just the built-in ``dict`` class. Memory-efficiency and alternatives ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If you already have large native dictionaries, the downside to wrapping them in a -``dict`` call is that memory is copied (on both Py3 and with -``future.builtins.dict``). For example:: +If you already have large native dictionaries, the downside to wrapping them in +a ``dict`` call is that memory is copied (on both Py3 and on Py2). For +example:: - # Currently, this allocates and then frees a large amount of temporary - # memory: + # This allocates and then frees a large amount of temporary memory: d = dict({i: i**2 for i in range(10**7)}) If dictionary methods like ``values`` and ``items`` are called only once, this -obviously negates the memory benefits the overridden methods offer with not -creating temporary lists. +obviously negates the memory benefits offered by the overridden methods through +not creating temporary lists. -The memory-efficient (and CPU-efficient) alternatives are either:: +The memory-efficient (and CPU-efficient) alternatives are: -- to construct a dictionary from an iterator -- as above with the generator expression ``dict((i, i**2) for i in range(10**7)``; -- to construct an empty dictionary with a ``dict()`` call using ``future.builtins.dict`` (rather than ``{}``) and update it incrementally; -- to use +- to construct a dictionary from an iterator. The above line could use a + generator like this:: -If your dictionaries are small or performance is not critical, you can of course stick -with standard Python 3 code in your Py2/3 compatible codebase:: - - # Assuming d is a native dict ... + d = dict((i, i**2) for i in range(10**7)) - for item in d: - # code here +- to construct an empty dictionary with a ``dict()`` call using + ``builtins.dict`` (rather than ``{}``) and then update it; - for item in d.items(): - # code here - - for value in d.values(): - # code here - -In this case there will be memory overhead of list creation for each call of -``items``, ``values`` or ``keys``. - -If your dictionaries are large, or if you want to use the Python 3 -set-like behaviour on both Py3 and Python 2.7, then you can instead use the -``viewkeys`` etc. functions from :mod:`future.utils`, passing in regular -dictionaries:: +- to use the ``viewitems`` etc. functions from :mod:`future.utils`, passing in + regular dictionaries:: from future.utils import viewkeys, viewvalues, viewitems for (key, value) in viewitems(hugedictionary): # some code here - + # Set intersection: d = {i**2: i for i in range(1000)} both = viewkeys(d) & set(range(0, 1000, 7)) - + # Set union: both = viewvalues(d1) | viewvalues(d2) -For Python 2.6 compatibility, the functions ``iteritems`` etc. are also -available in :mod:`future.utils`. These are equivalent to the functions of the -same names in ``six``, which is equivalent to calling the ``iteritems`` etc. -methods on Python 2, or to calling ``items`` etc. on Python 3. - +For compatibility, the functions ``iteritems`` etc. are also available in +:mod:`future.utils`. These are equivalent to the functions of the same names in +``six``, which is equivalent to calling the ``iteritems`` etc. methods on +Python 2, or to calling ``items`` etc. on Python 3. diff --git a/docs/faq.rst b/docs/faq.rst index 93a74c49..e49adf61 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -4,15 +4,11 @@ Frequently Asked Questions (FAQ) Who is this for? ================ -1. People who would prefer to write clean, future-proof Python -3-compatible code, but whose day-jobs require that their code still run -on a Python 2 stack. +1. People with existing or new Python 3 codebases who wish to provide +ongoing Python 2.7 support easily and with little maintenance burden. -2. People who wish to simplify migration of their codebases to Python -3.3+, module by module, without giving up Python 2 compatibility. - -3. People with existing or new Python 3 codebases who wish to provide -ongoing Python 2.6 / 2.7 support easily and with little maintenance burden. +2. People who wish to ease and accelerate migration of their Python 2 codebases +to Python 3.3+, module by module, without giving up Python 2 compatibility. Why upgrade to Python 3? @@ -28,23 +24,23 @@ Python 2.7 is the end of the Python 2 line. (See `PEP 404 `_.) The language and standard libraries are improving only in Python 3.x. -Python 3.3 is a better language and better set of standard libraries than -Python 2.x in almost every way. Python 3 is cleaner, less warty, and easier to +Python 3.x is a better language and better set of standard libraries than +Python 2.x in many ways. Python 3.x is cleaner, less warty, and easier to learn than Python 2. It has better memory efficiency, easier Unicode handling, -and powerful new features like function annotations and the `asyncio -`_ module. +and powerful new features like the `asyncio +`_ module. .. Unicode handling is also much easier. For example, see `this page .. `_ .. describing some of the problems with handling Unicode on Python 2 that -.. Python 3 mostly solves. +.. Python 3 mostly solves. Porting philosophy ================== -Why use this approach? ----------------------- +Why write Python 3-style code? +------------------------------ Here are some quotes: @@ -65,14 +61,15 @@ Here are some quotes: difficulties associated with the Python 3 transition (like distinguishing their 8-bit text strings from their binary data). They shouldn't be punished with additional code changes ..." from `PEP 414 - `_ by Nick Coghlan. + `_ by Armin Ronacher and Nick + Coghlan. Can't I just roll my own Py2/3 compatibility layer? --------------------------------------------------- -Yes, but using ``future`` will probably lead to cleaner code with fewer -bugs. +Yes, but using ``python-future`` will probably be easier and lead to cleaner +code with fewer bugs. Consider this quote: @@ -87,64 +84,73 @@ Consider this quote: ``future`` also includes various Py2/3 compatibility tools in :mod:`future.utils` picked from large projects (including IPython, -Django, Jinja2, Pandas), which should hopefully reduce the burden on -every project to roll its own py3k compatibility wrapper module. +Django, Jinja2, Pandas), which should reduce the burden on every project to +roll its own py3k compatibility wrapper module. + +What inspired this project? +--------------------------- -How did the original need for this arise? ------------------------------------------ +In our Python training courses, we at `Python Charmers +`_ faced a dilemma: teach people Python 3, which was +future-proof but not as useful to them today because of weaker 3rd-party +package support, or teach people Python 2, which was more useful today but +would require them to change their code and unlearn various habits soon. We +searched for ways to avoid polluting the world with more deprecated code, but +didn't find a good way. -In teaching Python, we at Python Charmers faced a dilemma: teach people -Python 3, which was future-proof but not as useful to them today because -of weaker 3rd-party package support, or teach people Python 2, which was -more useful today but would require them to change their code and unlearn -various habits soon. We searched for ways to avoid polluting the world -with more deprecated code, but didn't find a good way. +Also, in attempting to help with porting packages such as `scikit-learn +`_ to Python 3, I (Ed) was dissatisfied with how much +code cruft was necessary to introduce to support Python 2 and 3 from a single +codebase (the preferred porting option). Since backward-compatibility with +Python 2 may be necessary for at least the next 5 years, one of the promised +benefits of Python 3 -- cleaner code with fewer of Python 2's warts -- was +difficult to realize before in practice in a single codebase that supported +both platforms. -Also, in attempting to help with porting packages such as -``scikit-learn`` to Python 3, I was dissatisfied with how much code cruft -was necessary to introduce to support Python 2 and 3 from a single -codebase (the preferred porting option). Since backward-compatibility -with Python 2 may be necessary for at least the next 5 years, one of the -promised benefits of Python 3 -- cleaner code with fewer of Python 2's -warts -- was difficult to realize before in practice in a single codebase -that supported both platforms. +The goal is to accelerate the uptake of Python 3 and help the strong Python +community to remain united around a single version of the language. Maturity ======== -Is it tested? -------------- +How well has it been tested? +---------------------------- + +``future`` is used by thousands of projects and has been downloaded over 1.7 billion times. Some projects like Sage have used it to port 800,000+ lines of Python 2 code to Python 2/3. + +Currently ``python-future`` has over 1000 unit tests. Many of these are straight +from the Python 3.3 and 3.4 test suites. + +In general, the ``future`` package itself is in good shape, whereas the +``futurize`` script for automatic porting is imperfect; chances are it will +require some manual cleanup afterwards. The ``past`` package also needs to be +expanded. + -``future`` currently has 300+ unit tests. In general, the ``future`` package -itself is in good shape, whereas the ``futurize`` script for automatic porting -is incomplete and imperfect. (Chances are it will require some manual cleanup -afterwards.) - Is the API stable? ------------------ -Not yet; ``future`` is still in beta. We will try not to break anything which -was documented and used to work. After version 1.0 is released, the API will -not change in backward-incompatible ways until a hypothetical version 2.0. +Yes; ``future`` is mature. We'll make very few changes from here, trying not to +break anything which was documented and used to work. .. Are there any example of Python 2 packages ported to Python 3 using ``future`` and ``futurize``? ------------------------------------------------------------------------------------------------ - + Yes, an example is the port of ``xlwt``, available `here `_. - + The code also contains backports for several Py3 standard library modules under ``future/standard_library/``. -Relationship between ``future`` and other compatibility tools -============================================================= +Relationship between python-future and other compatibility tools +================================================================ -How does this relate to ``2to3`` and ``lib2to3``? -------------------------------------------------- +How does this relate to ``2to3``? +--------------------------------- ``2to3`` is a powerful and flexible tool that can produce different styles of Python 3 code. It is, however, primarily designed for one-way @@ -166,13 +172,11 @@ most inputs; worse, it allows arbitrary code execution by the user for specially crafted inputs because of the ``eval()`` executed by Python 2's ``input()`` function. -This is not an isolated example; almost every output of ``2to3`` will -need modification to provide backward compatibility with Python 2. -``future`` is designed for just this purpose. - -The ``future`` source tree contains a script called ``futurize`` that is -based on ``lib2to3``. It is designed to turn either Python 2-only or -Python 3-only code into code that is compatible with both platforms. +This is not an isolated example; almost every output of ``2to3`` will need +modification to provide backward compatibility with Python 2. As an +alternative, the ``python-future`` project provides a script called +``futurize`` that is based on ``lib2to3`` but will produce code that is +compatible with both platforms (Py2 and Py3). Can I maintain a Python 2 codebase and use 2to3 to automatically convert to Python 3 in the setup script? @@ -180,12 +184,12 @@ Can I maintain a Python 2 codebase and use 2to3 to automatically convert to Pyth This was originally the approach recommended by Python's core developers, but it has some large drawbacks: - + 1. First, your actual working codebase will be stuck with Python 2's warts and smaller feature set for as long as you need to retain Python 2 compatibility. This may be at least 5 years for many projects, possibly much longer. - + 2. Second, this approach carries the significant disadvantage that you cannot apply patches submitted by Python 3 users against the auto-generated Python 3 code. (See `this talk @@ -195,41 +199,44 @@ auto-generated Python 3 code. (See `this talk What is the relationship between ``future`` and ``six``? -------------------------------------------------------- -``future`` is a higher-level compatibility layer than ``six`` that -includes more backported functionality from Python 3 and supports cleaner -code but requires more modern Python versions to run. +``python-future`` is a higher-level compatibility layer than ``six`` that +includes more backported functionality from Python 3, more forward-ported +functionality from Python 2, and supports cleaner code, but requires more +modern Python versions to run. -``future`` and ``six`` share the same goal of making it possible to write +``python-future`` and ``six`` share the same goal of making it possible to write a single-source codebase that works on both Python 2 and Python 3. -``future`` has the further goal of allowing standard Py3 code to run with +``python-future`` has the further goal of allowing standard Py3 code to run with almost no modification on both Py3 and Py2. ``future`` provides a more complete set of support for Python 3's features, including backports of Python 3 builtins such as the ``bytes`` object (which is very different to Python 2's ``str`` object) and several standard library modules. -``future`` supports only Python 2.6+ and Python 3.3+, whereas ``six`` +``python-future`` supports only Python 2.7+ and Python 3.4+, whereas ``six`` supports all versions of Python from 2.4 onwards. (See :ref:`supported-versions`.) If you must support older Python versions, -``six`` will be esssential for you. However, beware that maintaining +``six`` will be essential for you. However, beware that maintaining single-source compatibility with older Python versions is ugly and `not fun `_. -If you can drop support for older Python versions, ``future`` leverages -some important features introduced into Python 2.6 and 2.7, such as -import hooks, to allow you to write more idiomatic, maintainable code. +If you can drop support for older Python versions, ``python-future`` leverages +some important features introduced into Python 2.7, such as +import hooks, and a comprehensive and well-tested set of backported +functionality, to allow you to write more idiomatic, maintainable code with +fewer compatibility hacks. -What is the relationship between this project and ``python-modernize``? ------------------------------------------------------------------------ +What is the relationship between ``python-future`` and ``python-modernize``? +---------------------------------------------------------------------------- ``python-future`` contains, in addition to the ``future`` compatibility package, a ``futurize`` script that is similar to ``python-modernize.py`` in intent and design. Both are based heavily on ``2to3``. - + Whereas ``python-modernize`` converts Py2 code into a common subset of Python 2 and 3, with ``six`` as a run-time dependency, ``futurize`` converts either Py2 or Py3 code into (almost) standard Python 3 code, -with ``future`` as a run-time dependency. +with ``future`` as a run-time dependency. Because ``future`` provides more backported Py3 behaviours from ``six``, the code resulting from ``futurize`` is more likely to work @@ -238,25 +245,21 @@ effort. Platform and version support ----------------------------- +============================ .. _supported-versions: -Which versions of Python does ``future`` support? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Python 2.6, 2.7, and 3.3+ only. +Which versions of Python does ``python-future`` support? +-------------------------------------------------------- -Python 2.6 and 2.7 introduced many important forward-compatibility -features (such as import hooks, ``b'...'`` literals and ``__future__`` -definitions) that greatly reduce the maintenance burden for single-source -Py2/3 compatible code. ``future`` leverages these features and aims to -close the remaining gap between Python 3 and 2.6 / 2.7. +Python 2.6 and 3.3+ only. Python 2.7 and Python 3.4+ are preferred. -Python 3.2 could perhaps be supported too, although the illegal unicode -literal ``u'...'`` syntax may be a drawback. The Py3.2 userbase is -very small, however. Please let us know if you would like to see Py3.2 -support. +You may be able to use Python 2.6 but writing Py2/3 compatible code is not as +easy. Python 2.7 introduced many important forward-compatibility features (such +as import hooks, ``b'...'`` literals and ``__future__`` definitions) that +greatly reduce the maintenance burden for single-source Py2/3 compatible code. +``future`` leverages these features and aims to close the remaining gap between +Python 3 and 2.7. Do you support Pypy? @@ -267,7 +270,7 @@ and pull requests are welcome! Do you support IronPython and/or Jython? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Not sure. This would be nice... @@ -280,8 +283,7 @@ Support Is there a mailing list? ------------------------ -Yes, please ask any questions on the `python-porting -`_ mailing list. +There was a `python-porting` mailing list, but it's now dead. .. _contributing: @@ -293,12 +295,13 @@ Can I help? ----------- Yes please :) We welcome bug reports, additional tests, pull requests, -and stories of either success or failure with using it. Help with the fixers -for the ``futurize`` script is particularly welcome. +and stories of either success or failure with using it. + +However, please note that the project is not very actively maintained. It +should be considered done, like Python 2. Where is the repo? ------------------ ``_. - diff --git a/docs/future-builtins.rst b/docs/future-builtins.rst new file mode 100644 index 00000000..df8ff79d --- /dev/null +++ b/docs/future-builtins.rst @@ -0,0 +1,17 @@ +.. _future-builtins: + +``future.builtins`` +=================== + +The ``future.builtins`` module is also accessible as ``builtins`` on Py2. + +- ``pow()`` supports fractional exponents of negative numbers like in Py3:: + + >>> from builtins import pow + >>> pow(-1, 0.5) + (6.123233995736766e-17+1j) + +- ``round()`` uses Banker's Rounding as in Py3 to the nearest even last digit:: + + >>> from builtins import round + >>> assert round(0.1250, 2) == 0.12 diff --git a/docs/futurize.rst b/docs/futurize.rst new file mode 100644 index 00000000..11520a6c --- /dev/null +++ b/docs/futurize.rst @@ -0,0 +1,314 @@ +.. _forwards-conversion: + +``futurize``: Py2 to Py2/3 +-------------------------- + +.. include:: futurize_overview.rst + + +.. _forwards-conversion-stage1: + +Stage 1: "safe" fixes +~~~~~~~~~~~~~~~~~~~~~ + +Run the first stage of the conversion process with:: + + futurize --stage1 mypackage/*.py + +or, if you are using zsh, recursively:: + + futurize --stage1 mypackage/**/*.py + +This applies fixes that modernize Python 2 code without changing the effect of +the code. With luck, this will not introduce any bugs into the code, or will at +least be trivial to fix. The changes are those that bring the Python code +up-to-date without breaking Py2 compatibility. The resulting code will be +modern Python 2.7-compatible code plus ``__future__`` imports from the +following set: + +.. code-block:: python + + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function + +Only those ``__future__`` imports deemed necessary will be added unless +the ``--all-imports`` command-line option is passed to ``futurize``, in +which case they are all added. + +The ``from __future__ import unicode_literals`` declaration is not added +unless the ``--unicode-literals`` flag is passed to ``futurize``. + +The changes include:: + + - except MyException, e: + + except MyException as e: + + - print >>stderr, "Blah" + + from __future__ import print_function + + print("Blah", stderr) + + - class MyClass: + + class MyClass(object): + + - def next(self): + + def __next__(self): + + - if d.has_key(key): + + if key in d: + +Implicit relative imports fixed, e.g.:: + + - import mymodule + + from __future__ import absolute_import + + from . import mymodule + +.. and all unprefixed string literals '...' gain a b prefix to be b'...'. + +.. (This last step can be prevented using --no-bytes-literals if you already have b'...' markup in your code, whose meaning would otherwise be lost.) + +Stage 1 does not add any imports from the ``future`` package. The output of +stage 1 will probably not (yet) run on Python 3. + +The goal for this stage is to create most of the ``diff`` for the entire +porting process, but without introducing any bugs. It should be uncontroversial +and safe to apply to every Python 2 package. The subsequent patches introducing +Python 3 compatibility should then be shorter and easier to review. + +The complete set of fixers applied by ``futurize --stage1`` is: + +.. code-block:: python + + lib2to3.fixes.fix_apply + lib2to3.fixes.fix_except + lib2to3.fixes.fix_exec + lib2to3.fixes.fix_exitfunc + lib2to3.fixes.fix_funcattrs + lib2to3.fixes.fix_has_key + lib2to3.fixes.fix_idioms + lib2to3.fixes.fix_intern + lib2to3.fixes.fix_isinstance + lib2to3.fixes.fix_methodattrs + lib2to3.fixes.fix_ne + lib2to3.fixes.fix_numliterals + lib2to3.fixes.fix_paren + lib2to3.fixes.fix_reduce + lib2to3.fixes.fix_renames + lib2to3.fixes.fix_repr + lib2to3.fixes.fix_standarderror + lib2to3.fixes.fix_sys_exc + lib2to3.fixes.fix_throw + lib2to3.fixes.fix_tuple_params + lib2to3.fixes.fix_types + lib2to3.fixes.fix_ws_comma + lib2to3.fixes.fix_xreadlines + libfuturize.fixes.fix_absolute_import + libfuturize.fixes.fix_next_call + libfuturize.fixes.fix_print_with_import + libfuturize.fixes.fix_raise + +The following fixers from ``lib2to3`` are not applied: + +.. code-block:: python + + lib2to3.fixes.fix_import + +The ``fix_absolute_import`` fixer in ``libfuturize.fixes`` is applied instead of +``lib2to3.fixes.fix_import``. The new fixer both makes implicit relative +imports explicit and adds the declaration ``from __future__ import +absolute_import`` at the top of each relevant module. + +.. code-block:: python + + lib2to3.fixes.fix_next + +The ``fix_next_call`` fixer in ``libfuturize.fixes`` is applied instead of +``fix_next`` in stage 1. The new fixer changes any ``obj.next()`` calls to +``next(obj)``, which is Py2/3 compatible, but doesn't change any ``next`` method +names to ``__next__``, which would break Py2 compatibility. + +``fix_next`` is applied in stage 2. + +.. code-block:: python + + lib2to3.fixes.fix_print + +The ``fix_print_with_import`` fixer in ``libfuturize.fixes`` changes the code to +use print as a function and also adds ``from __future__ import +print_function`` to the top of modules using ``print()``. + +In addition, it avoids adding an extra set of parentheses if these already +exist. So ``print(x)`` does not become ``print((x))``. + +.. code-block:: python + + lib2to3.fixes.fix_raise + +This fixer translates code to use the Python 3-only ``with_traceback()`` +method on exceptions. + +.. code-block:: python + + lib2to3.fixes.fix_set_literal + +This converts ``set([1, 2, 3]``) to ``{1, 2, 3}``. + +.. code-block:: python + + lib2to3.fixes.fix_ws_comma + +This performs cosmetic changes. This is not applied by default because it +does not serve to improve Python 2/3 compatibility. (In some cases it may +also reduce readability: see issue #58.) + + + +.. _forwards-conversion-stage2: + +Stage 2: Py3-style code with wrappers for Py2 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Run stage 2 of the conversion process with:: + + futurize --stage2 myfolder/*.py + +This stage adds a dependency on the ``future`` package. The goal for stage 2 is +to make further mostly safe changes to the Python 2 code to use Python 3-style +code that then still runs on Python 2 with the help of the appropriate builtins +and utilities in ``future``. + +For example:: + + name = raw_input('What is your name?\n') + + for k, v in d.iteritems(): + assert isinstance(v, basestring) + + class MyClass(object): + def __unicode__(self): + return u'My object' + def __str__(self): + return unicode(self).encode('utf-8') + +would be converted by Stage 2 to this code:: + + from builtins import input + from builtins import str + from future.utils import iteritems, python_2_unicode_compatible + + name = input('What is your name?\n') + + for k, v in iteritems(d): + assert isinstance(v, (str, bytes)) + + @python_2_unicode_compatible + class MyClass(object): + def __str__(self): + return u'My object' + +Stage 2 also renames standard-library imports to their Py3 names and adds these +two lines:: + + from future import standard_library + standard_library.install_aliases() + +For example:: + + import ConfigParser + +becomes:: + + from future import standard_library + standard_library.install_aliases() + import configparser + +The complete list of fixers applied in Stage 2 is:: + + lib2to3.fixes.fix_dict + lib2to3.fixes.fix_filter + lib2to3.fixes.fix_getcwdu + lib2to3.fixes.fix_input + lib2to3.fixes.fix_itertools + lib2to3.fixes.fix_itertools_imports + lib2to3.fixes.fix_long + lib2to3.fixes.fix_map + lib2to3.fixes.fix_next + lib2to3.fixes.fix_nonzero + lib2to3.fixes.fix_operator + lib2to3.fixes.fix_raw_input + lib2to3.fixes.fix_zip + + libfuturize.fixes.fix_basestring + libfuturize.fixes.fix_cmp + libfuturize.fixes.fix_division_safe + libfuturize.fixes.fix_execfile + libfuturize.fixes.fix_future_builtins + libfuturize.fixes.fix_future_standard_library + libfuturize.fixes.fix_future_standard_library_urllib + libfuturize.fixes.fix_metaclass + libpasteurize.fixes.fix_newstyle + libfuturize.fixes.fix_object + libfuturize.fixes.fix_unicode_keep_u + libfuturize.fixes.fix_xrange_with_import + + +Not applied:: + + lib2to3.fixes.fix_buffer # Perhaps not safe. Test this. + lib2to3.fixes.fix_callable # Not needed in Py3.2+ + lib2to3.fixes.fix_execfile # Some problems: see issue #37. + # We use the custom libfuturize.fixes.fix_execfile instead. + lib2to3.fixes.fix_future # Removing __future__ imports is bad for Py2 compatibility! + lib2to3.fixes.fix_imports # Called by libfuturize.fixes.fix_future_standard_library + lib2to3.fixes.fix_imports2 # We don't handle this yet (dbm) + lib2to3.fixes.fix_metaclass # Causes SyntaxError in Py2! Use the one from ``six`` instead + lib2to3.fixes.fix_unicode # Strips off the u'' prefix, which removes a potentially + # helpful source of information for disambiguating + # unicode/byte strings. + lib2to3.fixes.fix_urllib # Included in libfuturize.fix_future_standard_library_urllib + lib2to3.fixes.fix_xrange # Custom one because of a bug with Py3.3's lib2to3 + + + +.. Ideally the output of this stage should not be a ``SyntaxError`` on either +.. Python 3 or Python 2. + +.. _forwards-conversion-text: + +Separating text from bytes +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +After applying stage 2, the recommended step is to decide which of your Python +2 strings represent text and which represent binary data and to prefix all +string literals with either ``b`` or ``u`` accordingly. Furthermore, to ensure +that these types behave similarly on Python 2 as on Python 3, also wrap +byte-strings or text in the ``bytes`` and ``str`` types from ``future``. For +example:: + + from builtins import bytes, str + b = bytes(b'\x00ABCD') + s = str(u'This is normal text') + +Any unadorned string literals will then represent native platform strings +(byte-strings on Py2, unicode strings on Py3). + +An alternative is to pass the ``--unicode-literals`` flag:: + + $ futurize --unicode-literals mypython2script.py + +After running this, all string literals that were not explicitly marked up as +``b''`` will mean text (Python 3 ``str`` or Python 2 ``unicode``). + + + +.. _forwards-conversion-stage3: + +Post-conversion +~~~~~~~~~~~~~~~ + +After running ``futurize``, we recommend first running your tests on Python 3 and making further code changes until they pass on Python 3. + +The next step would be manually tweaking the code to re-enable Python 2 +compatibility with the help of the ``future`` package. For example, you can add +the ``@python_2_unicode_compatible`` decorator to any classes that define custom +``__str__`` methods. See :ref:`what-else` for more info. diff --git a/docs/futurize_cheatsheet.rst b/docs/futurize_cheatsheet.rst new file mode 100644 index 00000000..82f211c6 --- /dev/null +++ b/docs/futurize_cheatsheet.rst @@ -0,0 +1,124 @@ +.. _futurize_cheatsheet: + +``futurize`` quick-start guide +------------------------------ + +How to convert Py2 code to Py2/3 code using ``futurize``: + +.. _porting-setup: + +Step 0: setup +~~~~~~~~~~~~~ + +Step 0 goal: set up and see the tests passing on Python 2 and failing on Python 3. + +a. Clone the package from github/bitbucket. Optionally rename your repo to ``package-future``. Examples: ``reportlab-future``, ``paramiko-future``, ``mezzanine-future``. +b. Create and activate a Python 2 conda environment or virtualenv. Install the package with ``python setup.py install`` and run its test suite on Py2.7 (e.g. ``python setup.py test`` or ``py.test``) +c. Optionally: if there is a ``.travis.yml`` file, add Python version 3.6 and remove any versions < 2.6. +d. Install Python 3 with e.g. ``sudo apt-get install python3``. On other platforms, an easy way is to use `Miniconda `_. Then e.g.:: + + conda create -n py36 python=3.6 pip + +.. _porting-step1: + +Step 1: modern Py2 code +~~~~~~~~~~~~~~~~~~~~~~~ + +The goal for this step is to modernize the Python 2 code without introducing any dependencies (on ``future`` or e.g. ``six``) at this stage. + +**1a**. Install ``future`` into the virtualenv using:: + + pip install future + +**1b**. Run ``futurize --stage1 -w *.py subdir1/*.py subdir2/*.py``. Note that with +recursive globbing in ``bash`` or ``zsh``, you can apply stage 1 to all source files +recursively with:: + + futurize --stage1 -w . + +**1c**. Commit all changes + +**1d**. Re-run the test suite on Py2 and fix any errors. + +See :ref:`forwards-conversion-stage1` for more info. + + +Example error +************* + +One relatively common error after conversion is:: + + Traceback (most recent call last): + ... + File "/home/user/Install/BleedingEdge/reportlab/tests/test_encrypt.py", line 19, in + from .test_pdfencryption import parsedoc + ValueError: Attempted relative import in non-package + +If you get this error, try adding an empty ``__init__.py`` file in the package +directory. (In this example, in the tests/ directory.) If this doesn’t help, +and if this message appears for all tests, they must be invoked differently +(from the cmd line or e.g. ``setup.py``). The way to run a module inside a +package on Python 3, or on Python 2 with ``absolute_import`` in effect, is:: + + python -m tests.test_platypus_xref + +(For more info, see `PEP 328 `_ and +the `PEP 8 `_ section on absolute +imports.) + + +.. _porting-step2: + +Step 2: working Py3 code that still supports Py2 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The goal for this step is to get the tests passing first on Py3 and then on Py2 +again with the help of the ``future`` package. + +**2a**. Run:: + + futurize --stage2 myfolder1/*.py myfolder2/*.py + +You can view the stage 2 changes to all Python source files recursively with:: + + futurize --stage2 . + +To apply the changes, add the ``-w`` argument. + +This stage makes further conversions needed to support both Python 2 and 3. +These will likely require imports from ``future`` on Py2 (and sometimes on Py3), +such as:: + + from future import standard_library + standard_library.install_aliases() + # ... + from builtins import bytes + from builtins import open + from future.utils import with_metaclass + +Optionally, you can use the ``--unicode-literals`` flag to add this import to +the top of each module:: + + from __future__ import unicode_literals + +All strings in the module would then be unicode on Py2 (as on Py3) unless +explicitly marked with a ``b''`` prefix. + +If you would like ``futurize`` to import all the changed builtins to have their +Python 3 semantics on Python 2, invoke it like this:: + + futurize --stage2 --all-imports myfolder/*.py + + +**2b**. Re-run your tests on Py3 now. Make changes until your tests pass on Python 3. + +**2c**. Commit your changes! :) + +**2d**. Now run your tests on Python 2 and notice the errors. Add wrappers from +``future`` to re-enable Python 2 compatibility. See the +:ref:`compatible-idioms` cheat sheet and :ref:`what-else` for more info. + +After each change, re-run the tests on Py3 and Py2 to ensure they pass on both. + +**2e**. You're done! Celebrate! Push your code and announce to the world! Hashtags +#python3 #python-future. diff --git a/docs/futurize_overview.rst b/docs/futurize_overview.rst new file mode 100644 index 00000000..769b65c7 --- /dev/null +++ b/docs/futurize_overview.rst @@ -0,0 +1,55 @@ +The ``futurize`` script passes Python 2 code through all the appropriate fixers +to turn it into valid Python 3 code, and then adds ``__future__`` and +``future`` package imports to re-enable compatibility with Python 2. + +For example, running ``futurize`` turns this Python 2 code: + +.. code-block:: python + + import ConfigParser # Py2 module name + + class Upper(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def next(self): # Py2-style iterator interface + return next(self._iter).upper() + def __iter__(self): + return self + + itr = Upper('hello') + print next(itr), + for letter in itr: + print letter, # Py2-style print statement + +into this code which runs on both Py2 and Py3: + +.. code-block:: python + + from __future__ import print_function + from future import standard_library + standard_library.install_aliases() + from future.builtins import next + from future.builtins import object + import configparser # Py3-style import + + class Upper(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def __next__(self): # Py3-style iterator interface + return next(self._iter).upper() + def __iter__(self): + return self + + itr = Upper('hello') + print(next(itr), end=' ') # Py3-style print function + for letter in itr: + print(letter, end=' ') + + +To write out all the changes to your Python files that ``futurize`` suggests, +use the ``-w`` flag. + +For complex projects, it is probably best to divide the porting into two stages. +Stage 1 is for "safe" changes that modernize the code but do not break Python +2.7 compatibility or introduce a dependency on the ``future`` package. Stage 2 +is to complete the process. diff --git a/docs/hindsight.rst b/docs/hindsight.rst index a7b283a1..b4654c6a 100644 --- a/docs/hindsight.rst +++ b/docs/hindsight.rst @@ -1,4 +1,3 @@ In a perfect world, the new metaclass syntax should ideally be available in Python 2 as a `__future__`` import like ``from __future__ import new_metaclass_syntax``. - diff --git a/docs/imports.rst b/docs/imports.rst index 5800bea4..f7dcd9fc 100644 --- a/docs/imports.rst +++ b/docs/imports.rst @@ -3,10 +3,10 @@ Imports ======= -.. ___future__-imports: +.. _-__future__-imports: __future__ imports -~~~~~~~~~~~~~~~~~~ +------------------ To write a Python 2/3 compatible codebase, the first step is to add this line to the top of each module:: @@ -24,28 +24,29 @@ standard feature of Python, see the following docs: - print_function: `PEP 3105: Make print a function `_ - unicode_literals: `PEP 3112: Bytes literals in Python 3000 `_ -These are all available in Python 2.6 and up, and enabled by default in Python 3.x. +These are all available in Python 2.7 and up, and enabled by default in Python 3.x. -.. _star-imports: +.. _builtins-imports: + +Imports of builtins +------------------- -Star imports -~~~~~~~~~~~~ +.. _star-imports: -If you don't mind namespace pollution on Python 2, the easiest way to provide -Py2/3 compatibility for new code using ``future`` is to include the following -imports at the top of every module:: +Implicit imports +~~~~~~~~~~~~~~~~ - from future.builtins import * +If you don't mind namespace pollution, the easiest way to provide Py2/3 +compatibility for new code using ``future`` is to include the following imports +at the top of every module:: -together with these module imports when necessary:: - - from future import standard_library, utils + from builtins import * -On Python 3, ``from future.builtins import *`` line has zero effect and zero -namespace pollution. +On Python 3, this has no effect. (It shadows builtins with globals of the same +names.) -On Python 2, this import line shadows 16 builtins (listed below) to +On Python 2, this import line shadows 18 builtins (listed below) to provide their Python 3 semantics. @@ -55,40 +56,45 @@ Explicit imports ~~~~~~~~~~~~~~~~ Explicit forms of the imports are often preferred and are necessary for using -some automated code-analysis tools. +certain automated code-analysis tools. -The most common imports from ``future`` are:: - - from future import standard_library, utils - from future.builtins import (bytes, int, range, round, str, super, - ascii, chr, hex, input, oct, open, - filter, map, zip) +The complete set of imports of builtins from ``future`` is:: -The disadvantage of importing only some of the builtins is that it -increases the risk of introducing Py2/3 portability bugs as your code -evolves over time. Be especially aware of not importing ``input``, which could -expose a security vulnerability on Python 2 if Python 3's semantics are -expected. + from builtins import (ascii, bytes, chr, dict, filter, hex, input, + int, map, next, oct, open, pow, range, round, + str, super, zip) -One further technical distinction is that unlike the ``import *`` form above, -these explicit imports do actually change ``locals()``; this is equivalent -to typing ``bytes = bytes; int = int`` etc. for each builtin. +These are also available under the ``future.builtins`` namespace for backward compatibility. + +Importing only some of the builtins is cleaner but increases the risk of +introducing Py2/3 portability bugs as your code evolves over time. For example, +be aware of forgetting to import ``input``, which could expose a security +vulnerability on Python 2 if Python 3's semantics are expected. + +.. One further technical distinction is that unlike the ``import *`` form above, +.. these explicit imports do actually modify ``locals()`` on Py3; this is +.. equivalent to typing ``bytes = bytes; int = int`` etc. for each builtin. The internal API is currently as follows:: - from future.builtins.backports import bytes, int, range, round, str, super - from future.builtins.misc import ascii, chr, hex, input, oct, open + from future.types import bytes, dict, int, range, str + from future.builtins.misc import (ascii, chr, hex, input, next, + oct, open, pow, round, super) from future.builtins.iterators import filter, map, zip -To understand the details of the backported builtins on Python 2, see the -docs for these modules. Please note that this internal API is evolving and may -not be stable between different versions of ``future``. +Please note that this internal API is evolving and may not be stable between +different versions of ``future``. To understand the details of the backported +builtins on Python 2, see the docs for these modules. + +For more information on what the backported types provide, see :ref:`what-else`. + +.. < Section about past.translation is included here > .. _obsolete-builtins: Obsolete Python 2 builtins -~~~~~~~~~~~~~~~~~~~~~~~~~~ +__________________________ Twelve Python 2 builtins have been removed from Python 3. To aid with porting code to Python 3 module by module, you can use the following @@ -109,202 +115,12 @@ equivalent Python 3 forms and then adds ``future`` imports to resurrect Python 2 support, as described in :ref:`forwards-conversion-stage2`. -.. _unicode-literals: - -Should I import unicode_literals? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The ``future`` package can be used with or without ``unicode_literals`` -imports. - -There is some contention in the community about whether it is advisable -to import ``unicode_literals`` from ``__future__`` in a Python 2/3 -compatible codebase. - -It is more compelling to use ``unicode_literals`` when back-porting -new or existing Python 3 code to Python 2/3. For porting existing Python 2 -code to 2/3, explicitly marking up all unicode string literals with ``u''`` -prefixes helps to avoid unintentionally changing an existing Python 2 API. - -If you use ``unicode_literals``, testing and debugging your code with -*Python 3* first is probably the easiest way to fix your code. After this, -fixing Python 2 support will be easier. - -To avoid confusion, we recommend using ``unicode_literals`` everywhere -across a code-base or not at all, instead of turning on for only some -modules. - -This section summarizes the benefits and drawbacks of using -``unicode_literals``. +.. include:: standard_library_imports.rst -Benefits --------- - -1. String literals are unicode on Python 3. Making them unicode on Python 2 - leads to more consistency of your string types the two runtimes. This can - make it easier to understand and debug your code. - -2. Code without ``u''`` prefixes is cleaner, one of the claimed advantages - of Python 3. Even though some unicode strings would require a function - call to invert them to native strings for some Python 2 APIs (see - :ref:`stdlib-incompatibilities`), the incidence of these function calls - would be much lower than with using ``u''`` prefixes in the absence of - ``unicode_literals``. - -3. The diff for a Python 2 -> 2/3 port may be smaller, less noisy, and - easier to review with ``unicode_literals`` than if an explicit ``u''`` - prefix is added to every unadorned string literal. - -4. If support for Python 3.2 is required (e.g. for Ubuntu 12.04 LTS or - Debian wheezy), ``u''`` prefixes are a ``SyntaxError``, making - ``unicode_literals`` the only option for a Python 2/3 compatible - codebase. - - -Drawbacks ---------- - -1. Adding ``unicode_literals`` to a module amounts to a "global flag day" for - that module, changing the data types of all strings in the module at once. - Cautious developers may prefer an incremental approach. (See - `here `_ for an excellent article - describing the superiority of an incremental patch-set in the the case - of the Linux kernel.) - -.. This is a larger-scale change than adding explicit ``u''`` prefixes to -.. all strings that should be Unicode. - -2. Changing to ``unicode_literals`` will likely introduce regressions on - Python 2 that require an initial investment of time to find and fix. The - APIs may be changed in subtle ways that are not immediately obvious. - - An example on Python 2:: - - ### Module: mypaths.py - - ... - def unix_style_path(path): - return path.replace('\\', '/') - ... - - ### User code: - - >>> path1 = '\\Users\\Ed' - >>> unix_style_path(path1) - u'/Users/ed' - - On Python 2, adding a ``unicode_literals`` import to ``mypaths.py`` would - change the return type of the ``unix_style_path`` function from ``str`` to - ``unicode``, which is difficult to anticipate and probably unintended. - - The counterargument is that this code is broken, in a portability - sense; we see this from Python 3 raising a ``TypeError`` upon passing the - function a byte-string. The code needs to be changed to make explicit - whether the ``path`` argument is to be a byte string or a unicode string. - -3. With ``unicode_literals`` in effect, there is no way to specify a native - string literal (``str`` type on both platforms). This can be worked around as follows:: - - >>> from __future__ import unicode_literals - >>> ... - >>> from future.utils import bytes_to_native_str as n - - >>> s = n(b'ABCD') - >>> s - 'ABCD' # on both Py2 and Py3 - - although this incurs a performance penalty (a function call and, on Py3, - a ``decode`` method call.) - - This is a little awkward because various Python library APIs (standard - and non-standard) require a native string to be passed on both Py2 - and Py3. (See :ref:`stdlib-incompatibilities` for some examples. WSGI - dictionaries are another.) - -3. If a codebase already explicitly marks up all text with ``u''`` prefixes, - and if support for Python versions 3.0-3.2 can be dropped, then - removing the existing ``u''`` prefixes and replacing these with - ``unicode_literals`` imports (the porting approach Django used) would - introduce more noise into the patch and make it more difficult to review. - However, note that the ``futurize`` script takes advantage of PEP 414 and - does not remove explicit ``u''`` prefixes that already exist. - -4. Turning on ``unicode_literals`` converts even docstrings to unicode, but - Pydoc breaks with unicode docstrings containing non-ASCII characters for - Python versions < 2.7.7. (Fix committed in Jan 2014.):: - - >>> def f(): - ... u"Author: Martin von Löwis" - - >>> help(f) - - /Users/schofield/Install/anaconda/python.app/Contents/lib/python2.7/pydoc.pyc in pipepager(text, cmd) - 1376 pipe = os.popen(cmd, 'w') - 1377 try: - -> 1378 pipe.write(text) - 1379 pipe.close() - 1380 except IOError: - - UnicodeEncodeError: 'ascii' codec can't encode character u'\xf6' in position 71: ordinal not in range(128) - -See `this Stack Overflow thread -`_ -for other gotchas. - - -Others' perspectives --------------------- - -In favour of ``unicode_literals`` -********************************* - -The following `quote `_ is from Aymeric Augustin on 23 August 2012 regarding -why he chose ``unicode_literals`` for the port of Django to a Python -2/3-compatible codebase.: - - "... I'd like to explain why this PEP [PEP 414, which allows explicit - ``u''`` prefixes for unicode literals on Python 3.3+] is at odds with - the porting philosophy I've applied to Django, and why I would have - vetoed taking advantage of it. - - "I believe that aiming for a Python 2 codebase with Python 3 - compatibility hacks is a counter-productive way to port a project. You - end up with all the drawbacks of Python 2 (including the legacy `u` - prefixes) and none of the advantages Python 3 (especially the sane - string handling). - - "Working to write Python 3 code, with legacy compatibility for Python - 2, is much more rewarding. Of course it takes more effort, but the - results are much cleaner and much more maintainable. It's really about - looking towards the future or towards the past. - - "I understand the reasons why PEP 414 was proposed and why it was - accepted. It makes sense for legacy software that is minimally - maintained. I hope nobody puts Django in this category!" - - -Against ``unicode_literals`` -**************************** - - "There are so many subtle problems that ``unicode_literals`` causes. - For instance lots of people accidentally introduce unicode into - filenames and that seems to work, until they are using it on a system - where there are unicode characters in the filesystem path." - - -- Armin Ronacher - - "+1 from me for avoiding the unicode_literals future, as it can have - very strange side effects in Python 2.... This is one of the key - reasons I backed Armin's PEP 414." - - -- Nick Coghlan - - "Yeah, one of the nuisances of the WSGI spec is that the header values - IIRC are the str or StringType on both py2 and py3. With - unicode_literals this causes hard-to-spot bugs, as some WSGI servers - might be more tolerant than others, but usually using unicode in python - 2 for WSGI headers will cause the response to fail." - - -- Antti Haapala +.. include:: translation.rst +.. include:: unicode_literals.rst +Next steps +---------- +See :ref:`what-else`. diff --git a/docs/index.rst b/docs/index.rst index 9f8f6e2e..cc84c9b7 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,10 +1,9 @@ -future: clean single-source support for Python 2/3 -====================================================== +Easy, clean, reliable Python 2/3 compatibility +============================================== -``future`` is the missing compatibility layer between Python 2 and Python -3. It allows you to use a single, clean Python 3.x-compatible +``python-future`` is the missing compatibility layer between Python 2 and +Python 3. It allows you to use a single, clean Python 3.x-compatible codebase to support both Python 2 and Python 3 with minimal overhead. .. include:: contents.rst.inc - diff --git a/docs/int_object.rst b/docs/int_object.rst index e0f8a8bd..f774784b 100644 --- a/docs/int_object.rst +++ b/docs/int_object.rst @@ -23,7 +23,7 @@ is a subclass of Python 2's ``long`` with the same representation behaviour as Python 3's ``int``. To ensure an integer is long compatibly with both Py3 and Py2, cast it like this:: - >>> from future.builtins import int + >>> from builtins import int >>> must_be_a_long_integer = int(1234) The backported ``int`` object helps with writing doctests and simplifies code @@ -31,6 +31,8 @@ that deals with ``long`` and ``int`` as special cases on Py2. An example is the following code from ``xlwt-future`` (called by the ``xlwt.antlr.BitSet`` class) for writing out Excel ``.xls`` spreadsheets. With ``future``, the code is:: + from builtins import int + def longify(data): """ Turns data (an int or long, or a list of ints or longs) into a @@ -64,9 +66,3 @@ Without ``future`` (or with ``future`` < 0.7), this might be:: return list(map(int, data)) # same as returning data, but with up-front typechecking else: return list(map(long, data)) - - -Note that ``future.builtins`` defines :func:`isinstance` specially to handle -Python 2's short integers as well as the backported Py3-like ``int``. See -:ref:`isinstance-calls`. - diff --git a/docs/isinstance.rst b/docs/isinstance.rst index ce3cee64..2bb5084a 100644 --- a/docs/isinstance.rst +++ b/docs/isinstance.rst @@ -4,7 +4,7 @@ isinstance ---------- The following tests all pass on Python 3:: - + >>> assert isinstance(2**62, int) >>> assert isinstance(2**63, int) >>> assert isinstance(b'my byte-string', bytes) @@ -39,7 +39,7 @@ then the fifth test fails too:: After importing the builtins from ``future``, all these tests pass on Python 2 as on Python 3:: - >>> from future.builtins import bytes, int, str + >>> from builtins import bytes, int, str >>> assert isinstance(10, int) >>> assert isinstance(10**100, int) @@ -52,7 +52,7 @@ However, note that the last test requires that ``unicode_literals`` be imported >>> assert isinstance('unicode string 2', str) This works because the backported types ``int``, ``bytes`` and ``str`` -have metaclasses that override ``__instancecheck__``. See `PEP 3119 +(and others) have metaclasses that override ``__instancecheck__``. See `PEP 3119 `_ for details. @@ -60,8 +60,8 @@ for details. Passing data to/from Python 2 libraries --------------------------------------- -If you are passing any of the backported types (``bytes``, ``str``, -``int``) into brittle library code that performs type-checks using ``type()``, +If you are passing any of the backported types (``bytes``, ``int``, ``dict, +``str``) into brittle library code that performs type-checks using ``type()``, rather than ``isinstance()``, or requires that you pass Python 2's native types (rather than subclasses) for some other reason, it may be necessary to upcast the types from ``future`` to their native superclasses on Py2. @@ -69,30 +69,30 @@ the types from ``future`` to their native superclasses on Py2. The ``native`` function in ``future.utils`` is provided for this. Here is how to use it. (The output showing is from Py2):: - >>> from future.builtins import * + >>> from builtins import int, bytes, str >>> from future.utils import native >>> a = int(10**20) # Py3-like long int >>> a 100000000000000000000 >>> type(a) - future.builtins.backports.newint.newint + future.types.newint.newint >>> native(a) 100000000000000000000L >>> type(native(a)) long - + >>> b = bytes(b'ABC') >>> type(b) - future.builtins.backports.newbytes.newbytes + future.types.newbytes.newbytes >>> native(b) 'ABC' >>> type(native(b)) str - + >>> s = str(u'ABC') >>> type(s) - future.builtins.backports.newstr.newstr + future.types.newstr.newstr >>> native(s) u'ABC' >>> type(native(s)) @@ -115,4 +115,3 @@ The objects ``native_str`` and ``native_bytes`` are available in The functions ``native_str_to_bytes`` and ``bytes_to_native_str`` are also available for more explicit conversions. - diff --git a/docs/limitations.rst b/docs/limitations.rst index c822a27f..0d13805d 100644 --- a/docs/limitations.rst +++ b/docs/limitations.rst @@ -1,4 +1,3 @@ - limitations of the ``future`` module and differences between Py2 and Py3 that are not (yet) handled =================================================================================================== @@ -39,7 +38,7 @@ Also: b'\x00'[0] != 0 b'\x01'[0] != 1 - + ``futurize`` does not yet wrap all byte-string literals in a ``bytes()`` call. This is on the to-do list. See :ref:`bytes-object` for more information. @@ -47,9 +46,7 @@ Also: Notes ----- - Ensure you are using new-style classes on Py2. Py3 doesn't require - inheritance from ``object`` for this, but Py2 does. ``futurize - --from3`` adds this back in automatically, but ensure you do this too + inheritance from ``object`` for this, but Py2 does. ``pasteurize`` + adds this back in automatically, but ensure you do this too when writing your classes, otherwise weird breakage when e.g. calling ``super()`` may occur. - - diff --git a/docs/metaclasses.rst b/docs/metaclasses.rst index c4bcdd00..d40c5a46 100644 --- a/docs/metaclasses.rst +++ b/docs/metaclasses.rst @@ -5,16 +5,14 @@ Python 3 and Python 2 syntax for metaclasses are incompatible. ``future`` provides a function (from ``jinja2/_compat.py``) called :func:`with_metaclass` that can assist with specifying metaclasses portably across Py3 and Py2. Use it like this:: - + from future.utils import with_metaclass class BaseForm(object): pass - + class FormType(type): pass - + class Form(with_metaclass(FormType, BaseForm)): pass - - diff --git a/docs/notebooks/Writing Python 2-3 compatible code.ipynb b/docs/notebooks/Writing Python 2-3 compatible code.ipynb new file mode 100644 index 00000000..663ede44 --- /dev/null +++ b/docs/notebooks/Writing Python 2-3 compatible code.ipynb @@ -0,0 +1,3167 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cheat Sheet: Writing Python 2-3 compatible code" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- **Copyright (c):** 2013-2024 Python Charmers, Australia.\n", + "- **Author:** Ed Schofield.\n", + "- **Licence:** Creative Commons Attribution.\n", + "\n", + "A PDF version is here: https://python-future.org/compatible_idioms.pdf\n", + "\n", + "This notebook shows you idioms for writing future-proof code that is compatible with both versions of Python: 2 and 3. It accompanies Ed Schofield's talk at PyCon AU 2014, \"Writing 2/3 compatible code\". (The video is here: .)\n", + "\n", + "Minimum versions:\n", + "\n", + " - Python 2: 2.6+\n", + " - Python 3: 3.3+" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The imports below refer to these ``pip``-installable packages on PyPI:\n", + "\n", + " import future # pip install future\n", + " import builtins # pip install future\n", + " import past # pip install future\n", + " import six # pip install six\n", + "\n", + "The following scripts are also ``pip``-installable:\n", + "\n", + " futurize # pip install future\n", + " pasteurize # pip install future\n", + "\n", + "See https://python-future.org and https://pythonhosted.org/six/ for more information." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Essential syntax differences" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### print" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "print 'Hello'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "print('Hello')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To print multiple strings, import ``print_function`` to prevent Py2 from interpreting it as a tuple:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "print 'Hello', 'Guido'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "from __future__ import print_function # (at top of module)\n", + "\n", + "print('Hello', 'Guido')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "print >> sys.stderr, 'Hello'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "from __future__ import print_function\n", + "\n", + "print('Hello', file=sys.stderr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "print 'Hello'," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "from __future__ import print_function\n", + "\n", + "print('Hello', end='')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Raising exceptions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "raise ValueError, \"dodgy value\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "raise ValueError(\"dodgy value\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Raising exceptions with a traceback:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "traceback = sys.exc_info()[2]\n", + "raise ValueError, \"dodgy value\", traceback" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 3 only:\n", + "raise ValueError(\"dodgy value\").with_traceback()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 1\n", + "from six import reraise as raise_\n", + "# or\n", + "from future.utils import raise_\n", + "\n", + "traceback = sys.exc_info()[2]\n", + "raise_(ValueError, \"dodgy value\", traceback)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 2\n", + "from future.utils import raise_with_traceback\n", + "\n", + "raise_with_traceback(ValueError(\"dodgy value\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Exception chaining (PEP 3134):" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Setup:\n", + "class DatabaseError(Exception):\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 3 only\n", + "class FileDatabase:\n", + " def __init__(self, filename):\n", + " try:\n", + " self.file = open(filename)\n", + " except IOError as exc:\n", + " raise DatabaseError('failed to open') from exc" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "from future.utils import raise_from\n", + "\n", + "class FileDatabase:\n", + " def __init__(self, filename):\n", + " try:\n", + " self.file = open(filename)\n", + " except IOError as exc:\n", + " raise_from(DatabaseError('failed to open'), exc)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Testing the above:\n", + "try:\n", + " fd = FileDatabase('non_existent_file.txt')\n", + "except Exception as e:\n", + " assert isinstance(e.__cause__, IOError) # FileNotFoundError on Py3.3+ inherits from IOError" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Catching exceptions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "try:\n", + " ...\n", + "except ValueError, e:\n", + " ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "try:\n", + " ...\n", + "except ValueError as e:\n", + " ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Division" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Integer division (rounding down):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "assert 2 / 3 == 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "assert 2 // 3 == 0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"True division\" (float division):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 3 only:\n", + "assert 3 / 2 == 1.5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "from __future__ import division # (at top of module)\n", + "\n", + "assert 3 / 2 == 1.5" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Old division\" (i.e. compatible with Py2 behaviour):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "a = b / c # with any types" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "from past.utils import old_div\n", + "\n", + "a = old_div(b, c) # always same as / on Py2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Long integers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Short integers are gone in Python 3 and ``long`` has become ``int`` (without the trailing ``L`` in the ``repr``)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only\n", + "k = 9223372036854775808L\n", + "\n", + "# Python 2 and 3:\n", + "k = 9223372036854775808" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only\n", + "bigint = 1L\n", + "\n", + "# Python 2 and 3\n", + "from builtins import int\n", + "bigint = int(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To test whether a value is an integer (of any kind):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "if isinstance(x, (int, long)):\n", + " ...\n", + "\n", + "# Python 3 only:\n", + "if isinstance(x, int):\n", + " ...\n", + "\n", + "# Python 2 and 3: option 1\n", + "from builtins import int # subclass of long on Py2\n", + "\n", + "if isinstance(x, int): # matches both int and long on Py2\n", + " ...\n", + "\n", + "# Python 2 and 3: option 2\n", + "from past.builtins import long\n", + "\n", + "if isinstance(x, (int, long)):\n", + " ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Octal constants" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "0644 # Python 2 only" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "0o644 # Python 2 and 3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Backtick repr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "`x` # Python 2 only" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "repr(x) # Python 2 and 3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Metaclasses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "class BaseForm(object):\n", + " pass\n", + "\n", + "class FormType(type):\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "class Form(BaseForm):\n", + " __metaclass__ = FormType\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 3 only:\n", + "class Form(BaseForm, metaclass=FormType):\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "from six import with_metaclass\n", + "# or\n", + "from future.utils import with_metaclass\n", + "\n", + "class Form(with_metaclass(FormType, BaseForm)):\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Strings and bytes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Unicode (text) string literals" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you are upgrading an existing Python 2 codebase, it may be preferable to mark up all string literals as unicode explicitly with ``u`` prefixes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only\n", + "s1 = 'The Zen of Python'\n", + "s2 = u'きたないのよりきれいな方がいい\\n'\n", + "\n", + "# Python 2 and 3\n", + "s1 = u'The Zen of Python'\n", + "s2 = u'きたないのよりきれいな方がいい\\n'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The ``futurize`` and ``python-modernize`` tools do not currently offer an option to do this automatically." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you are writing code for a new project or new codebase, you can use this idiom to make all string literals in a module unicode strings:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3\n", + "from __future__ import unicode_literals # at top of module\n", + "\n", + "s1 = 'The Zen of Python'\n", + "s2 = 'きたないのよりきれいな方がいい\\n'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "See https://python-future.org/unicode_literals.html for more discussion on which style to use." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Byte-string literals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only\n", + "s = 'This must be a byte-string'\n", + "\n", + "# Python 2 and 3\n", + "s = b'This must be a byte-string'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To loop over a byte-string with possible high-bit characters, obtaining each character as a byte-string of length 1:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "for bytechar in 'byte-string with high-bit chars like \\xf9':\n", + " ...\n", + "\n", + "# Python 3 only:\n", + "for myint in b'byte-string with high-bit chars like \\xf9':\n", + " bytechar = bytes([myint])\n", + "\n", + "# Python 2 and 3:\n", + "from builtins import bytes\n", + "for myint in bytes(b'byte-string with high-bit chars like \\xf9'):\n", + " bytechar = bytes([myint])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As an alternative, ``chr()`` and ``.encode('latin-1')`` can be used to convert an int into a 1-char byte string:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 3 only:\n", + "for myint in b'byte-string with high-bit chars like \\xf9':\n", + " char = chr(myint) # returns a unicode string\n", + " bytechar = char.encode('latin-1')\n", + "\n", + "# Python 2 and 3:\n", + "from builtins import bytes, chr\n", + "for myint in bytes(b'byte-string with high-bit chars like \\xf9'):\n", + " char = chr(myint) # returns a unicode string\n", + " bytechar = char.encode('latin-1') # forces returning a byte str" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### basestring" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "a = u'abc'\n", + "b = 'def'\n", + "assert (isinstance(a, basestring) and isinstance(b, basestring))\n", + "\n", + "# Python 2 and 3: alternative 1\n", + "from past.builtins import basestring # pip install future\n", + "\n", + "a = u'abc'\n", + "b = b'def'\n", + "assert (isinstance(a, basestring) and isinstance(b, basestring))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 2: refactor the code to avoid considering\n", + "# byte-strings as strings.\n", + "\n", + "from builtins import str\n", + "a = u'abc'\n", + "b = b'def'\n", + "c = b.decode()\n", + "assert isinstance(a, str) and isinstance(c, str)\n", + "# ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### unicode" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "templates = [u\"blog/blog_post_detail_%s.html\" % unicode(slug)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 1\n", + "from builtins import str\n", + "templates = [u\"blog/blog_post_detail_%s.html\" % str(slug)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 2\n", + "from builtins import str as text\n", + "templates = [u\"blog/blog_post_detail_%s.html\" % text(slug)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### StringIO" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "from StringIO import StringIO\n", + "# or:\n", + "from cStringIO import StringIO\n", + "\n", + "# Python 2 and 3:\n", + "from io import BytesIO # for handling byte strings\n", + "from io import StringIO # for handling unicode strings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imports relative to a package" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Suppose the package is:\n", + "\n", + " mypackage/\n", + " __init__.py\n", + " submodule1.py\n", + " submodule2.py\n", + " \n", + "and the code below is in ``submodule1.py``:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only: \n", + "import submodule2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "from . import submodule2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "# To make Py2 code safer (more like Py3) by preventing\n", + "# implicit relative imports, you can also add this to the top:\n", + "from __future__ import absolute_import" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dictionaries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "heights = {'Fred': 175, 'Anne': 166, 'Joe': 192}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Iterating through ``dict`` keys/values/items" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Iterable dict keys:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "for key in heights.iterkeys():\n", + " ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "for key in heights:\n", + " ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Iterable dict values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "for value in heights.itervalues():\n", + " ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Idiomatic Python 3\n", + "for value in heights.values(): # extra memory overhead on Py2\n", + " ..." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 1\n", + "from builtins import dict\n", + "\n", + "heights = dict(Fred=175, Anne=166, Joe=192)\n", + "for key in heights.values(): # efficient on Py2 and Py3\n", + " ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 2\n", + "from future.utils import itervalues\n", + "# or\n", + "from six import itervalues\n", + "\n", + "for key in itervalues(heights):\n", + " ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Iterable dict items:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "for (key, value) in heights.iteritems():\n", + " ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 1\n", + "for (key, value) in heights.items(): # inefficient on Py2 \n", + " ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 2\n", + "from future.utils import viewitems\n", + "\n", + "for (key, value) in viewitems(heights): # also behaves like a set\n", + " ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 3\n", + "from future.utils import iteritems\n", + "# or\n", + "from six import iteritems\n", + "\n", + "for (key, value) in iteritems(heights):\n", + " ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### dict keys/values/items as a list" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "dict keys as a list:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "keylist = heights.keys()\n", + "assert isinstance(keylist, list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "keylist = list(heights)\n", + "assert isinstance(keylist, list)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "dict values as a list:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "heights = {'Fred': 175, 'Anne': 166, 'Joe': 192}\n", + "valuelist = heights.values()\n", + "assert isinstance(valuelist, list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 1\n", + "valuelist = list(heights.values()) # inefficient on Py2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 2\n", + "from builtins import dict\n", + "\n", + "heights = dict(Fred=175, Anne=166, Joe=192)\n", + "valuelist = list(heights.values())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 3\n", + "from future.utils import listvalues\n", + "\n", + "valuelist = listvalues(heights)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 4\n", + "from future.utils import itervalues\n", + "# or\n", + "from six import itervalues\n", + "\n", + "valuelist = list(itervalues(heights))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "dict items as a list:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 1\n", + "itemlist = list(heights.items()) # inefficient on Py2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 2\n", + "from future.utils import listitems\n", + "\n", + "itemlist = listitems(heights)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 3\n", + "from future.utils import iteritems\n", + "# or\n", + "from six import iteritems\n", + "\n", + "itemlist = list(iteritems(heights))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Custom class behaviour" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom iterators" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only\n", + "class Upper(object):\n", + " def __init__(self, iterable):\n", + " self._iter = iter(iterable)\n", + " def next(self): # Py2-style\n", + " return self._iter.next().upper()\n", + " def __iter__(self):\n", + " return self\n", + "\n", + "itr = Upper('hello')\n", + "assert itr.next() == 'H' # Py2-style\n", + "assert list(itr) == list('ELLO')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 1\n", + "from builtins import object\n", + "\n", + "class Upper(object):\n", + " def __init__(self, iterable):\n", + " self._iter = iter(iterable)\n", + " def __next__(self): # Py3-style iterator interface\n", + " return next(self._iter).upper() # builtin next() function calls\n", + " def __iter__(self):\n", + " return self\n", + "\n", + "itr = Upper('hello')\n", + "assert next(itr) == 'H' # compatible style\n", + "assert list(itr) == list('ELLO')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 2\n", + "from future.utils import implements_iterator\n", + "\n", + "@implements_iterator\n", + "class Upper(object):\n", + " def __init__(self, iterable):\n", + " self._iter = iter(iterable)\n", + " def __next__(self): # Py3-style iterator interface\n", + " return next(self._iter).upper() # builtin next() function calls\n", + " def __iter__(self):\n", + " return self\n", + "\n", + "itr = Upper('hello')\n", + "assert next(itr) == 'H'\n", + "assert list(itr) == list('ELLO')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom ``__str__`` methods" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "class MyClass(object):\n", + " def __unicode__(self):\n", + " return 'Unicode string: \\u5b54\\u5b50'\n", + " def __str__(self):\n", + " return unicode(self).encode('utf-8')\n", + "\n", + "a = MyClass()\n", + "print(a) # prints encoded string" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unicode string: 孔子\n" + ] + } + ], + "source": [ + "# Python 2 and 3:\n", + "from future.utils import python_2_unicode_compatible\n", + "\n", + "@python_2_unicode_compatible\n", + "class MyClass(object):\n", + " def __str__(self):\n", + " return u'Unicode string: \\u5b54\\u5b50'\n", + "\n", + "a = MyClass()\n", + "print(a) # prints string encoded as utf-8 on Py2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom ``__nonzero__`` vs ``__bool__`` method:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "class AllOrNothing(object):\n", + " def __init__(self, l):\n", + " self.l = l\n", + " def __nonzero__(self):\n", + " return all(self.l)\n", + "\n", + "container = AllOrNothing([0, 100, 200])\n", + "assert not bool(container)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "from builtins import object\n", + "\n", + "class AllOrNothing(object):\n", + " def __init__(self, l):\n", + " self.l = l\n", + " def __bool__(self):\n", + " return all(self.l)\n", + "\n", + "container = AllOrNothing([0, 100, 200])\n", + "assert not bool(container)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lists versus iterators" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### xrange" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "for i in xrange(10**8):\n", + " ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: forward-compatible\n", + "from builtins import range\n", + "for i in range(10**8):\n", + " ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: backward-compatible\n", + "from past.builtins import xrange\n", + "for i in xrange(10**8):\n", + " ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only\n", + "mylist = range(5)\n", + "assert mylist == [0, 1, 2, 3, 4]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: forward-compatible: option 1\n", + "mylist = list(range(5)) # copies memory on Py2\n", + "assert mylist == [0, 1, 2, 3, 4]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: forward-compatible: option 2\n", + "from builtins import range\n", + "\n", + "mylist = list(range(5))\n", + "assert mylist == [0, 1, 2, 3, 4]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 3\n", + "from future.utils import lrange\n", + "\n", + "mylist = lrange(5)\n", + "assert mylist == [0, 1, 2, 3, 4]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: backward compatible\n", + "from past.builtins import range\n", + "\n", + "mylist = range(5)\n", + "assert mylist == [0, 1, 2, 3, 4]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### map" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "mynewlist = map(f, myoldlist)\n", + "assert mynewlist == [f(x) for x in myoldlist]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 1\n", + "# Idiomatic Py3, but inefficient on Py2\n", + "mynewlist = list(map(f, myoldlist))\n", + "assert mynewlist == [f(x) for x in myoldlist]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 2\n", + "from builtins import map\n", + "\n", + "mynewlist = list(map(f, myoldlist))\n", + "assert mynewlist == [f(x) for x in myoldlist]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 3\n", + "try:\n", + " from itertools import imap as map\n", + "except ImportError:\n", + " pass\n", + "\n", + "mynewlist = list(map(f, myoldlist)) # inefficient on Py2\n", + "assert mynewlist == [f(x) for x in myoldlist]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 4\n", + "from future.utils import lmap\n", + "\n", + "mynewlist = lmap(f, myoldlist)\n", + "assert mynewlist == [f(x) for x in myoldlist]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 5\n", + "from past.builtins import map\n", + "\n", + "mynewlist = map(f, myoldlist)\n", + "assert mynewlist == [f(x) for x in myoldlist]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### imap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "from itertools import imap\n", + "\n", + "myiter = imap(func, myoldlist)\n", + "assert isinstance(myiter, iter)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 3 only:\n", + "myiter = map(func, myoldlist)\n", + "assert isinstance(myiter, iter)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 1\n", + "from builtins import map\n", + "\n", + "myiter = map(func, myoldlist)\n", + "assert isinstance(myiter, iter)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 2\n", + "try:\n", + " from itertools import imap as map\n", + "except ImportError:\n", + " pass\n", + "\n", + "myiter = map(func, myoldlist)\n", + "assert isinstance(myiter, iter)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### zip, izip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As above with ``zip`` and ``itertools.izip``." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### filter, ifilter" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As above with ``filter`` and ``itertools.ifilter`` too." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Other builtins" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### File IO with open()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Python 2 only\n", + "f = open('myfile.txt')\n", + "data = f.read() # as a byte string\n", + "text = data.decode('utf-8')\n", + "\n", + "# Python 2 and 3: alternative 1\n", + "from io import open\n", + "f = open('myfile.txt', 'rb')\n", + "data = f.read() # as bytes\n", + "text = data.decode('utf-8') # unicode, not bytes\n", + "\n", + "# Python 2 and 3: alternative 2\n", + "from io import open\n", + "f = open('myfile.txt', encoding='utf-8')\n", + "text = f.read() # unicode, not bytes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### reduce()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "assert reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) == 1+2+3+4+5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "from functools import reduce\n", + "\n", + "assert reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) == 1+2+3+4+5" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### raw_input()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "name = raw_input('What is your name? ')\n", + "assert isinstance(name, str) # native str" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "from builtins import input\n", + "\n", + "name = input('What is your name? ')\n", + "assert isinstance(name, str) # native str on Py2 and Py3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### input()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "input(\"Type something safe please: \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3\n", + "from builtins import input\n", + "eval(input(\"Type something safe please: \"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Warning: using either of these is **unsafe** with untrusted input." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### file()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "f = file(pathname)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "f = open(pathname)\n", + "\n", + "# But preferably, use this:\n", + "from io import open\n", + "f = open(pathname, 'rb') # if f.read() should return bytes\n", + "# or\n", + "f = open(pathname, 'rt') # if f.read() should return unicode text" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### exec" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "exec 'x = 10'\n", + "\n", + "# Python 2 and 3:\n", + "exec('x = 10')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "g = globals()\n", + "exec 'x = 10' in g\n", + "\n", + "# Python 2 and 3:\n", + "g = globals()\n", + "exec('x = 10', g)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "l = locals()\n", + "exec 'x = 10' in g, l\n", + "\n", + "# Python 2 and 3:\n", + "exec('x = 10', g, l)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "But note that Py3's `exec()` is less powerful (and less dangerous) than Py2's `exec` statement." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### execfile()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "execfile('myfile.py')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 1\n", + "from past.builtins import execfile\n", + "\n", + "execfile('myfile.py')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 2\n", + "exec(compile(open('myfile.py').read()))\n", + "\n", + "# This can sometimes cause this:\n", + "# SyntaxError: function ... uses import * and bare exec ...\n", + "# See https://github.com/PythonCharmers/python-future/issues/37" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### unichr()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "assert unichr(8364) == '€'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 3 only:\n", + "assert chr(8364) == '€'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "from builtins import chr\n", + "assert chr(8364) == '€'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### intern()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "intern('mystring')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 3 only:\n", + "from sys import intern\n", + "intern('mystring')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 1\n", + "from past.builtins import intern\n", + "intern('mystring')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 2\n", + "from six.moves import intern\n", + "intern('mystring')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 3\n", + "from future.standard_library import install_aliases\n", + "install_aliases()\n", + "from sys import intern\n", + "intern('mystring')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 2\n", + "try:\n", + " from sys import intern\n", + "except ImportError:\n", + " pass\n", + "intern('mystring')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### apply()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "args = ('a', 'b')\n", + "kwargs = {'kwarg1': True}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "apply(f, args, kwargs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 1\n", + "f(*args, **kwargs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 2\n", + "from past.builtins import apply\n", + "apply(f, args, kwargs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### chr()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "assert chr(64) == b'@'\n", + "assert chr(200) == b'\\xc8'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 3 only: option 1\n", + "assert chr(64).encode('latin-1') == b'@'\n", + "assert chr(0xc8).encode('latin-1') == b'\\xc8'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 1\n", + "from builtins import chr\n", + "\n", + "assert chr(64).encode('latin-1') == b'@'\n", + "assert chr(0xc8).encode('latin-1') == b'\\xc8'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 3 only: option 2\n", + "assert bytes([64]) == b'@'\n", + "assert bytes([0xc8]) == b'\\xc8'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: option 2\n", + "from builtins import bytes\n", + "\n", + "assert bytes([64]) == b'@'\n", + "assert bytes([0xc8]) == b'\\xc8'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### cmp()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "assert cmp('a', 'b') < 0 and cmp('b', 'a') > 0 and cmp('c', 'c') == 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 1\n", + "from past.builtins import cmp\n", + "assert cmp('a', 'b') < 0 and cmp('b', 'a') > 0 and cmp('c', 'c') == 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 2\n", + "cmp = lambda(x, y): (x > y) - (x < y)\n", + "assert cmp('a', 'b') < 0 and cmp('b', 'a') > 0 and cmp('c', 'c') == 0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### reload()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "reload(mymodule)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3\n", + "from imp import reload\n", + "reload(mymodule)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Standard library" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### dbm modules" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only\n", + "import anydbm\n", + "import whichdb\n", + "import dbm\n", + "import dumbdbm\n", + "import gdbm\n", + "\n", + "# Python 2 and 3: alternative 1\n", + "from future import standard_library\n", + "standard_library.install_aliases()\n", + "\n", + "import dbm\n", + "import dbm.ndbm\n", + "import dbm.dumb\n", + "import dbm.gnu\n", + "\n", + "# Python 2 and 3: alternative 2\n", + "from future.moves import dbm\n", + "from future.moves.dbm import dumb\n", + "from future.moves.dbm import ndbm\n", + "from future.moves.dbm import gnu\n", + "\n", + "# Python 2 and 3: alternative 3\n", + "from six.moves import dbm_gnu\n", + "# (others not supported)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### commands / subprocess modules" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only\n", + "from commands import getoutput, getstatusoutput\n", + "\n", + "# Python 2 and 3\n", + "from future import standard_library\n", + "standard_library.install_aliases()\n", + "\n", + "from subprocess import getoutput, getstatusoutput" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### subprocess.check_output()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2.7 and above\n", + "from subprocess import check_output\n", + "\n", + "# Python 2.6 and above: alternative 1\n", + "from future.moves.subprocess import check_output\n", + "\n", + "# Python 2.6 and above: alternative 2\n", + "from future import standard_library\n", + "standard_library.install_aliases()\n", + "\n", + "from subprocess import check_output" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### collections: Counter, OrderedDict, ChainMap" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2.7 and above\n", + "from collections import Counter, OrderedDict, ChainMap\n", + "\n", + "# Python 2.6 and above: alternative 1\n", + "from future.backports import Counter, OrderedDict, ChainMap\n", + "\n", + "# Python 2.6 and above: alternative 2\n", + "from future import standard_library\n", + "standard_library.install_aliases()\n", + "\n", + "from collections import Counter, OrderedDict, ChainMap" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### StringIO module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only\n", + "from StringIO import StringIO\n", + "from cStringIO import StringIO" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3\n", + "from io import BytesIO\n", + "# and refactor StringIO() calls to BytesIO() if passing byte-strings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### http module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "import httplib\n", + "import Cookie\n", + "import cookielib\n", + "import BaseHTTPServer\n", + "import SimpleHTTPServer\n", + "import CGIHttpServer\n", + "\n", + "# Python 2 and 3 (after ``pip install future``):\n", + "import http.client\n", + "import http.cookies\n", + "import http.cookiejar\n", + "import http.server" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### xmlrpc module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "import DocXMLRPCServer\n", + "import SimpleXMLRPCServer\n", + "\n", + "# Python 2 and 3 (after ``pip install future``):\n", + "import xmlrpc.server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "import xmlrpclib\n", + "\n", + "# Python 2 and 3 (after ``pip install future``):\n", + "import xmlrpc.client" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### html escaping and entities" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3:\n", + "from cgi import escape\n", + "\n", + "# Safer (Python 2 and 3, after ``pip install future``):\n", + "from html import escape\n", + "\n", + "# Python 2 only:\n", + "from htmlentitydefs import codepoint2name, entitydefs, name2codepoint\n", + "\n", + "# Python 2 and 3 (after ``pip install future``):\n", + "from html.entities import codepoint2name, entitydefs, name2codepoint" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### html parsing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "from HTMLParser import HTMLParser\n", + "\n", + "# Python 2 and 3 (after ``pip install future``)\n", + "from html.parser import HTMLParser\n", + "\n", + "# Python 2 and 3 (alternative 2):\n", + "from future.moves.html.parser import HTMLParser" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### urllib module" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "``urllib`` is the hardest module to use from Python 2/3 compatible code. You may like to use Requests (https://python-requests.org) instead." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "from urlparse import urlparse\n", + "from urllib import urlencode\n", + "from urllib2 import urlopen, Request, HTTPError" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 3 only:\n", + "from urllib.parse import urlparse, urlencode\n", + "from urllib.request import urlopen, Request\n", + "from urllib.error import HTTPError" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: easiest option\n", + "from future.standard_library import install_aliases\n", + "install_aliases()\n", + "\n", + "from urllib.parse import urlparse, urlencode\n", + "from urllib.request import urlopen, Request\n", + "from urllib.error import HTTPError" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 2\n", + "from future.standard_library import hooks\n", + "\n", + "with hooks():\n", + " from urllib.parse import urlparse, urlencode\n", + " from urllib.request import urlopen, Request\n", + " from urllib.error import HTTPError" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 3\n", + "from future.moves.urllib.parse import urlparse, urlencode\n", + "from future.moves.urllib.request import urlopen, Request\n", + "from future.moves.urllib.error import HTTPError\n", + "# or\n", + "from six.moves.urllib.parse import urlparse, urlencode\n", + "from six.moves.urllib.request import urlopen\n", + "from six.moves.urllib.error import HTTPError" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 and 3: alternative 4\n", + "try:\n", + " from urllib.parse import urlparse, urlencode\n", + " from urllib.request import urlopen, Request\n", + " from urllib.error import HTTPError\n", + "except ImportError:\n", + " from urlparse import urlparse\n", + " from urllib import urlencode\n", + " from urllib2 import urlopen, Request, HTTPError" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tkinter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "import Tkinter\n", + "import Dialog\n", + "import FileDialog\n", + "import ScrolledText\n", + "import SimpleDialog\n", + "import Tix \n", + "import Tkconstants\n", + "import Tkdnd \n", + "import tkColorChooser\n", + "import tkCommonDialog\n", + "import tkFileDialog\n", + "import tkFont\n", + "import tkMessageBox\n", + "import tkSimpleDialog\n", + "import ttk\n", + "\n", + "# Python 2 and 3 (after ``pip install future``):\n", + "import tkinter\n", + "import tkinter.dialog\n", + "import tkinter.filedialog\n", + "import tkinter.scrolledtext\n", + "import tkinter.simpledialog\n", + "import tkinter.tix\n", + "import tkinter.constants\n", + "import tkinter.dnd\n", + "import tkinter.colorchooser\n", + "import tkinter.commondialog\n", + "import tkinter.filedialog\n", + "import tkinter.font\n", + "import tkinter.messagebox\n", + "import tkinter.simpledialog\n", + "import tkinter.ttk" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### socketserver" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "import SocketServer\n", + "\n", + "# Python 2 and 3 (after ``pip install future``):\n", + "import socketserver" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### copy_reg, copyreg" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "import copy_reg\n", + "\n", + "# Python 2 and 3 (after ``pip install future``):\n", + "import copyreg" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### configparser" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "from ConfigParser import ConfigParser\n", + "\n", + "# Python 2 and 3 (after ``pip install future``):\n", + "from configparser import ConfigParser" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### queue" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "from Queue import Queue, heapq, deque\n", + "\n", + "# Python 2 and 3 (after ``pip install future``):\n", + "from queue import Queue, heapq, deque" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### repr, reprlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "from repr import aRepr, repr\n", + "\n", + "# Python 2 and 3 (after ``pip install future``):\n", + "from reprlib import aRepr, repr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### UserDict, UserList, UserString" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "from UserDict import UserDict\n", + "from UserList import UserList\n", + "from UserString import UserString\n", + "\n", + "# Python 3 only:\n", + "from collections import UserDict, UserList, UserString\n", + "\n", + "# Python 2 and 3: alternative 1\n", + "from future.moves.collections import UserDict, UserList, UserString\n", + "\n", + "# Python 2 and 3: alternative 2\n", + "from six.moves import UserDict, UserList, UserString\n", + "\n", + "# Python 2 and 3: alternative 3\n", + "from future.standard_library import install_aliases\n", + "install_aliases()\n", + "from collections import UserDict, UserList, UserString" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### itertools: filterfalse, zip_longest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Python 2 only:\n", + "from itertools import ifilterfalse, izip_longest\n", + "\n", + "# Python 3 only:\n", + "from itertools import filterfalse, zip_longest\n", + "\n", + "# Python 2 and 3: alternative 1\n", + "from future.moves.itertools import filterfalse, zip_longest\n", + "\n", + "# Python 2 and 3: alternative 2\n", + "from six.moves import filterfalse, zip_longest\n", + "\n", + "# Python 2 and 3: alternative 3\n", + "from future.standard_library import install_aliases\n", + "install_aliases()\n", + "from itertools import filterfalse, zip_longest" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/docs/notebooks/bytes object.ipynb b/docs/notebooks/bytes object.ipynb new file mode 100644 index 00000000..57921442 --- /dev/null +++ b/docs/notebooks/bytes object.ipynb @@ -0,0 +1,161 @@ +{ + "metadata": { + "name": "" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import sys\n", + "sys.version" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 6, + "text": [ + "'2.7.6 (default, Mar 22 2014, 22:59:56) \\n[GCC 4.8.2]'" + ] + } + ], + "prompt_number": 6 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import future\n", + "future.__version__" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 1, + "text": [ + "'0.12.0-dev'" + ] + } + ], + "prompt_number": 1 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from builtins import bytes" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 2 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# Backported Py3 bytes object\n", + "b = bytes(b'ABCD')" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 3 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "list(b)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 4, + "text": [ + "[65, 66, 67, 68]" + ] + } + ], + "prompt_number": 4 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "repr(b)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 5, + "text": [ + "\"b'ABCD'\"" + ] + } + ], + "prompt_number": 5 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# These raise TypeErrors:\n", + "# b + u'EFGH'\n", + "# bytes(b',').join([u'Fred', u'Bill'])\n", + "# b < u'abcd'" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 10 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "b == u'ABCD'" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 9, + "text": [ + "False" + ] + } + ], + "prompt_number": 9 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} diff --git a/docs/notebooks/object special methods (next, bool, ...).ipynb b/docs/notebooks/object special methods (next, bool, ...).ipynb new file mode 100644 index 00000000..7da31856 --- /dev/null +++ b/docs/notebooks/object special methods (next, bool, ...).ipynb @@ -0,0 +1,246 @@ +{ + "metadata": { + "name": "" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "``object`` special methods" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import sys\n", + "sys.version" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 1, + "text": [ + "'2.7.6 (default, Mar 22 2014, 22:59:56) \\n[GCC 4.8.2]'" + ] + } + ], + "prompt_number": 1 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from builtins import object" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 2 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "object??" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 2 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# Py3-style iterators written as new-style classes (subclasses of\n", + "# future.builtins.object) are backward compatible with Py2:\n", + "class Upper(object):\n", + " def __init__(self, iterable):\n", + " self._iter = iter(iterable)\n", + " def __next__(self): # note the Py3 interface\n", + " return next(self._iter).upper()\n", + " def __iter__(self):\n", + " return self" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 3 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "assert list(Upper('hello')) == list('HELLO')" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 5 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "class AllOrNothing(object):\n", + " def __init__(self, l):\n", + " self.l = l\n", + " def __bool__(self):\n", + " return all(self.l)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 6 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "container = AllOrNothing([0, 100, 200])\n", + "bool(container)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 8, + "text": [ + "False" + ] + } + ], + "prompt_number": 8 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "container2 = AllOrNothing([-100, 100, 200])\n", + "bool(container2)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 9, + "text": [ + "True" + ] + } + ], + "prompt_number": 9 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Classes derived from Python builtins don't have this behaviour:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "class AllOrNothingBroken(list):\n", + " def __bool__(self):\n", + " print('Called!')\n", + " return all(self)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 13 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "container3 = AllOrNothingBroken([0, 1, 2])\n", + "bool(container3)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 14, + "text": [ + "True" + ] + } + ], + "prompt_number": 14 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "But subclasses of ``future`` types do:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from builtins import list\n", + "\n", + "class AllOrNothingFixed(list):\n", + " def __bool__(self):\n", + " print('Called!')\n", + " return all(self)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 15 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "container4 = AllOrNothingFixed([0, 1, 2])\n", + "bool(container4)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 17, + "text": [ + "True" + ] + } + ], + "prompt_number": 17 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} diff --git a/docs/older_interfaces.rst b/docs/older_interfaces.rst new file mode 100644 index 00000000..546f92b9 --- /dev/null +++ b/docs/older_interfaces.rst @@ -0,0 +1,141 @@ +.. _older-standard-library-interfaces: + +Older interfaces +~~~~~~~~~~~~~~~~ + +In addition to the direct and ``install_aliases()`` interfaces (described in +:ref:`standard-library-imports`), ``future`` supports four other interfaces to +the reorganized standard library. This is largely for historical reasons (for +versions prior to 0.14). + + +``future.moves`` interface +__________________________ + +The ``future.moves`` interface avoids import hooks. It may therefore be more +robust, at the cost of less idiomatic code. Use it as follows:: + + from future.moves import queue + from future.moves import socketserver + from future.moves.http.client import HTTPConnection + # etc. + +If you wish to achieve the effect of a two-level import such as this:: + + import http.client + +portably on both Python 2 and Python 3, note that Python currently does not +support syntax like this:: + + from future.moves import http.client + +One workaround is to replace the dot with an underscore:: + + import future.moves.http.client as http_client + + +Comparing future.moves and six.moves +++++++++++++++++++++++++++++++++++++ + +``future.moves`` and ``six.moves`` provide a similar Python 3-style +interface to the native standard library module definitions. + +The major difference is that the ``future.moves`` package is a real Python package +(``future/moves/__init__.py``) with real modules provided as ``.py`` files, whereas +``six.moves`` constructs fake ``_LazyModule`` module objects within the Python +code and injects them into the ``sys.modules`` cache. + +The advantage of ``six.moves`` is that the code fits in a single module that can be +copied into a project that seeks to eliminate external dependencies. + +The advantage of ``future.moves`` is that it is likely to be more robust in the +face of magic like Django's auto-reloader and tools like ``py2exe`` and +``cx_freeze``. See issues #51, #53, #56, and #63 in the ``six`` project for +more detail of bugs related to the ``six.moves`` approach. + + +``import_`` and ``from_import`` functions +_________________________________________ + +The functional interface is to use the ``import_`` and ``from_import`` +functions from ``future.standard_library`` as follows:: + + from future.standard_library import import_, from_import + + http = import_('http.client') + urllib = import_('urllib.request') + + urlopen, urlsplit = from_import('urllib.request', 'urlopen', 'urlsplit') + +This interface also works with two-level imports. + + +Context-manager for import hooks +________________________________ + +The context-manager interface is via a context-manager called ``hooks``:: + + from future.standard_library import hooks + with hooks(): + import socketserver + import queue + import configparser + import test.support + import html.parser + from collections import UserList + from itertools import filterfalse, zip_longest + from http.client import HttpConnection + import urllib.request + # and other moved modules and definitions + +This interface is straightforward and effective, using PEP 302 import +hooks. However, there are reports that this sometimes leads to problems +(see issue #238). Until this is resolved, it is probably safer to use direct +imports or one of the other import mechanisms listed above. + + +install_hooks() call (deprecated) +_________________________________ + +The last interface to the reorganized standard library is via a call to +``install_hooks()``:: + + from future import standard_library + standard_library.install_hooks() + + import urllib + f = urllib.request.urlopen('http://www.python.org/') + + standard_library.remove_hooks() + +If you use this interface, it is recommended to disable the import hooks again +after use by calling ``remove_hooks()``, in order to prevent the futurized +modules from being invoked inadvertently by other modules. (Python does not +automatically disable import hooks at the end of a module, but keeps them +active for the life of a process unless removed.) + +.. The call to ``scrub_future_sys_modules()`` removes any modules from the +.. ``sys.modules`` cache (on Py2 only) that have Py3-style names, like ``http.client``. +.. This can prevent libraries that have their own Py2/3 compatibility code from +.. importing the ``future.moves`` or ``future.backports`` modules unintentionally. +.. Code such as this will then fall through to using the Py2 standard library +.. modules on Py2:: +.. +.. try: +.. from http.client import HTTPConnection +.. except ImportError: +.. from httplib import HTTPConnection +.. +.. **Requests**: The above snippet is from the `requests +.. `_ library. As of v0.12, the +.. ``future.standard_library`` import hooks are compatible with Requests. + + +.. If you wish to avoid changing every reference of ``http.client`` to +.. ``http_client`` in your code, an alternative is this:: +.. +.. from future.standard_library import http +.. from future.standard_library.http import client as _client +.. http.client = client + +.. but it has the advantage that it can be used by automatic translation scripts such as ``futurize`` and ``pasteurize``. diff --git a/docs/open_function.rst b/docs/open_function.rst index a83c2d8f..7915d8a8 100644 --- a/docs/open_function.rst +++ b/docs/open_function.rst @@ -5,26 +5,26 @@ open() The Python 3 builtin :func:`open` function for opening files returns file contents as (unicode) strings unless the binary (``b``) flag is passed, as in:: - + open(filename, 'rb') in which case its methods like :func:`read` return Py3 :class:`bytes` objects. -``future.builtins`` provides an ``open`` function on Py2 that is mostly -compatible with that on Python 3 (e.g. it offers keyword arguments like -``encoding``). This maps to the ``open`` backport available in the standard -library :mod:`io` module on Py2.6 and Py2.7. +On Py2 with ``future`` installed, the :mod:`builtins` module provides an +``open`` function that is mostly compatible with that on Python 3 (e.g. it +offers keyword arguments like ``encoding``). This maps to the ``open`` backport +available in the standard library :mod:`io` module on Py2.7. One difference to be aware of between the Python 3 ``open`` and ``future.builtins.open`` on Python 2 is that the return types of methods such as :func:`read()` from the file object that ``open`` returns are not automatically cast from native bytes or unicode strings on Python 2 to the -appropriate ``future.builtins.bytes`` or ``future.builtins.str`` types. If you +corresponding ``future.builtins.bytes`` or ``future.builtins.str`` types. If you need the returned data to behave the exactly same way on Py2 as on Py3, you can cast it explicitly as follows:: from __future__ import unicode_literals - from future.builtins import * + from builtins import open, bytes data = open('image.png', 'rb').read() # On Py2, data is a standard 8-bit str with loose Unicode coercion. @@ -37,4 +37,3 @@ cast it explicitly as follows:: assert data[4] == 13 # integer # Raises TypeError: # data + u'' - diff --git a/docs/other/auto2to3.py b/docs/other/auto2to3.py new file mode 100644 index 00000000..1f56aa14 --- /dev/null +++ b/docs/other/auto2to3.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +"""Wrapper to run 2to3 automatically at import time. + +Usage: + auto2to3 -m mypackage.main_module + auto2to3 mypackage/script.py + +By default, all modules imported from a subdirectory of the current +directory will be run through `2to3`. To change this behavior, use the +`--package` or `--dir` flags to `auto2to3` to specify which packages or +directories contain Python 2 code that should be converted. + +2to3 output is cached on disk between runs for speed. + +Based on auto2to3.py by Georg Brandl: +http://dev.pocoo.org/hg/sandbox/file/tip/auto2to3.py +""" + +import argparse +import os +import sys +# imp was deprecated in python 3.6 +if sys.version_info >= (3, 6): + import importlib as imp +else: + import imp +import runpy +from io import StringIO +from pkgutil import ImpImporter, ImpLoader +import runpy +import sys +import tempfile + +import lib2to3 +from lib2to3.refactor import RefactoringTool, get_fixers_from_package + +fixes = get_fixers_from_package('lib2to3.fixes') +rt = RefactoringTool(fixes) + +PACKAGES = [] +DIRS = [] + +def maybe_2to3(filename, modname=None): + """Returns a python3 version of filename.""" + need_2to3 = False + filename = os.path.abspath(filename) + if any(filename.startswith(d) for d in DIRS): + need_2to3 = True + elif modname is not None and any(modname.startswith(p) for p in PACKAGES): + need_2to3 = True + if not need_2to3: + return filename + outfilename = '/_auto2to3_'.join(os.path.split(filename)) + if (not os.path.exists(outfilename) or + os.stat(filename).st_mtime > os.stat(outfilename).st_mtime): + try: + with open(filename) as file: + contents = file.read() + contents = rt.refactor_docstring(contents, filename) + tree = rt.refactor_string(contents, filename) + except Exception as err: + raise ImportError("2to3 couldn't convert %r" % filename) + outfile = open(outfilename, 'wb') + outfile.write(str(tree).encode('utf8')) + outfile.close() + return outfilename + + + +class ToThreeImporter(ImpImporter): + def find_module(self, fullname, path=None): + # this duplicates most of ImpImporter.find_module + subname = fullname.split(".")[-1] + if subname != fullname and self.path is None: + return None + if self.path is None: + path = None + else: + path = [os.path.realpath(self.path)] + try: + file, filename, etc = imp.find_module(subname, path) + except ImportError: + return None + if file and etc[2] == imp.PY_SOURCE: + outfilename = maybe_2to3(filename, modname=fullname) + if outfilename != filename: + file.close() + filename = outfilename + file = open(filename, 'rb') + return ImpLoader(fullname, file, filename, etc) + + +# setup the hook +sys.path_hooks.append(ToThreeImporter) +for key in sys.path_importer_cache: + if sys.path_importer_cache[key] is None: + sys.path_importer_cache[key] = ToThreeImporter(key) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--package', action='append') + parser.add_argument('--dir', action='append') + parser.add_argument('-m', action='store', metavar='MODULE') + args, rest = parser.parse_known_args() + if args.package: + PACKAGES.extend(args.package) + if args.dir: + DIRS.extend(os.path.abspath(d) for d in args.dir) + if not PACKAGES and not DIRS: + DIRS.append(os.getcwd()) + if args.m: + sys.argv[1:] = rest + runpy.run_module(args.m, run_name='__main__', alter_sys=True) + elif rest: + sys.argv = rest + converted = maybe_2to3(rest[0]) + with open(converted) as f: + new_globals = dict(__name__='__main__', + __file__=rest[0]) + exec(f.read(), new_globals) + else: + import code + code.interact() + +if __name__ == '__main__': + main() diff --git a/docs/other/find_pattern.py b/docs/other/find_pattern.py index 679a1d64..1a5da35e 100644 --- a/docs/other/find_pattern.py +++ b/docs/other/find_pattern.py @@ -38,6 +38,7 @@ Larger snippets can be placed in a file (as opposed to a command-line arg) and processed with the -f option. """ +from __future__ import print_function __author__ = "Collin Winter " @@ -65,7 +66,7 @@ def main(args): elif len(args) > 1: tree = driver.parse_stream(StringIO(args[1] + "\n")) else: - print >>sys.stderr, "You must specify an input file or an input string" + print("You must specify an input file or an input string", file=sys.stderr) return 1 examine_tree(tree) @@ -75,10 +76,10 @@ def examine_tree(tree): for node in tree.post_order(): if isinstance(node, pytree.Leaf): continue - print repr(str(node)) + print(repr(str(node))) verdict = raw_input() if verdict.strip(): - print find_pattern(node) + print(find_pattern(node)) return def find_pattern(node): diff --git a/docs/other/fix_notebook_html_colour.py b/docs/other/fix_notebook_html_colour.py new file mode 100755 index 00000000..36c2205f --- /dev/null +++ b/docs/other/fix_notebook_html_colour.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +A script to re-enable colour in .html files produced from IPython notebooks. + +Based on a script in a GitHub gist with this copyright notice: + +#---------------------------------------------------------------------------- +# Copyright (c) 2013 - Damián Avila +# +# Distributed under the terms of the Modified BSD License. +# +# A little snippet to fix @media print issue printing slides from IPython +#----------------------------------------------------------------------------- +""" + +import io +import sys + +notebook = sys.argv[1] +assert notebook.endswith('.html') +# notebook = 'jevans.ipynb' +path = notebook[:-5] + '.html' +flag = u'@media print{*{text-shadow:none !important;color:#000 !important' + +with io.open(path, 'r') as in_file: + data = in_file.readlines() + for i, line in enumerate(data): + if line[:64] == flag: + data[i] = data[i].replace('color:#000 !important;', '') + +with io.open(path, 'w') as out_file: + out_file.writelines(data) + +print("You can now print your slides") diff --git a/docs/other/lessons.txt b/docs/other/lessons.txt index 5794f496..ede523cb 100644 --- a/docs/other/lessons.txt +++ b/docs/other/lessons.txt @@ -30,7 +30,7 @@ Python 2: Python 3: >>> array.array(b'b') TypeError: must be a unicode character, not bytes - + >>> array.array(u'b') array('b') @@ -47,5 +47,3 @@ Running test_bytes.py from Py3 on Py2 (after fixing imports) gives this: Ran 203 tests in 0.209s FAILED (failures=31, errors=55, skipped=1) - - diff --git a/docs/other/upload_future_docs.sh b/docs/other/upload_future_docs.sh index 09672b7b..04470f3f 100644 --- a/docs/other/upload_future_docs.sh +++ b/docs/other/upload_future_docs.sh @@ -1,21 +1,23 @@ -On the local machine --------------------- +# On the local machine -git checkout v0.7.0 +git checkout v0.16.0 # or whatever rm -Rf docs/build/ cd docs; make html +cp cheatsheet.pdf ~/shared/ cd build -touch ../../python-future-html-docs.zip -rm ../../python-future-html-docs.zip -zip -r ../../python-future-html-docs.zip * -scp ../../python-future-html-docs.zip python-future.org: -ssh python-future.org +touch ~/shared/python-future-html-docs.zip +rm ~/shared/python-future-html-docs.zip +zip -r ~/shared/python-future-html-docs.zip * +scp ~/shared/python-future-html-docs.zip ubuntu@python-future.org: +scp ~/shared/cheatsheet.pdf ubuntu@python-future.org: +ssh ubuntu@python-future.org -On the remote machine: ----------------------- -cd /var/www/python-future/html -unzip ~/python-future-html-docs.zip -chmod a+r * _static/* +# On the remote machine: +cd /var/www/python-future.org/ +unzip -o ~/python-future-html-docs.zip +chmod a+r * html/* html/_static/* +cp ~/cheatsheet.pdf ./html/compatible_idioms.pdf +cp ~/cheatsheet.pdf ./html/cheatsheet.pdf diff --git a/docs/other/useful_links.txt b/docs/other/useful_links.txt index 8dec2f9b..abb96849 100644 --- a/docs/other/useful_links.txt +++ b/docs/other/useful_links.txt @@ -23,7 +23,7 @@ http://lucumr.pocoo.org/2011/12/7/thoughts-on-python3/ http://python3porting.com/fixers.html http://washort.twistedmatrix.com/2010/11/unicode-in-python-and-how-to-prevent-it.html http://docs.python.org/release/3.0.1/whatsnew/3.0.html -https://pypi.python.org/pypi/unicode-nazi +https://pypi.org/project/unicode-nazi/ http://www.rmi.net/~lutz/strings30.html "Porting your code to Python 3": Alexandre Vassalotti: peadrop.com/slides/mp5.pdf @@ -43,7 +43,7 @@ python-modernize: https://github.com/mitsuhiko/python-modernize 2to3 docs describing the different fixers: http://docs.python.org/2/library/2to3.html -Injecting code into running Python processes (hopefully not needed): https://pypi.python.org/pypi/pyrasite/2.0 +Injecting code into running Python processes (hopefully not needed): https://pypi.org/project/pyrasite/2.0/ Withdrawn PEP to help with the Py3k standard library transition: http://www.peps.io/364/ @@ -52,7 +52,7 @@ Import hooks http://www.peps.io/302/ "Hacking Python imports ... for fun and profit": blog post from 2012-05: http://xion.org.pl/2012/05/06/hacking-python-imports/ -Full importlib backport to Py2: https://pypi.python.org/pypi/backport_importlib/0...1 +Full importlib backport to Py2: https://pypi.org/project/backport_importlib/0...1/ Python 2.7 importlib subset: http://docs.python.org/2/whatsnew/2.7.html#importlib-section @@ -78,7 +78,7 @@ PEPs: 358, 3112, 3137, 3138 http://python3porting.com/noconv.html#unicode-section Unicode literals u'...' back in Python 3.3: http://www.python.org/dev/peps/pep-0414/ https://github.com/django/django/blob/master/django/utils/encoding.py -https://pypi.python.org/pypi/unicode-nazi +https://pypi.org/project/unicode-nazi/ http://docs.python.org/3/library/stdtypes.html#bytes-methods http://wolfprojects.altervista.org/talks/unicode-and-python-3/ Buffer protocol (which bytes and bytes-like objects obey): http://docs.python.org/3.3/c-api/buffer.html#bufferobjects @@ -86,7 +86,7 @@ Buffer protocol (which bytes and bytes-like objects obey): http://docs.python.or Python's future ---------------- -https://ncoghlan_devs-python-notes.readthedocs.org/en/latest/python3/questions_and_answers.html +https://ncoghlan-devs-python-notes.readthedocs.io/en/latest/python3/questions_and_answers.html http://www.ironfroggy.com/software/i-am-worried-about-the-future-of-python @@ -104,8 +104,7 @@ Also: typecheck module on PyPI To categorize ------------- -https://pypi.python.org/pypi/awkwardduet/1.1a4 +https://pypi.org/project/awkwardduet/1.1a4/ https://github.com/campadrenalin/persei/blob/master/persei.py http://slideshare.net/dabeaz/mastering-python-3-io http://rmi.net/~lutz/strings30.html - diff --git a/docs/overview.rst b/docs/overview.rst index 692ac483..72a33558 100644 --- a/docs/overview.rst +++ b/docs/overview.rst @@ -1,105 +1 @@ -.. _overview: - -Overview -======== - -``future`` is the missing compatibility layer between Python 3 and Python -2. It allows you to maintain a single, clean Python 3.x-compatible -codebase with minimal cruft and run it easily on Python 2 mostly unchanged. - -``future`` comes with ``futurize``, a script that helps you to transition -to supporting both Python 2 and 3 in a single codebase, module by module. - -.. _features: - -Features --------- - -- provides backports and remappings for 16 builtins with different - semantics on Py3 versus Py2 -- provides backports and remappings from the Py3 standard library -- 300+ unit tests -- ``futurize`` script based on ``2to3``, ``3to2`` and parts of - ``python-modernize`` for automatic conversion from either Py2 or Py3 to a - clean single-source codebase compatible with Python 2.6+ and Python 3.3+. -- a consistent set of utility functions and decorators selected from - Py2/3 compatibility interfaces from projects like ``six``, ``IPython``, - ``Jinja2``, ``Django``, and ``Pandas``. - - -.. _code-examples: - -Code examples -------------- - -``future`` is designed to be imported at the top of each Python module -together with Python's built-in ``__future__`` module. For example, this -code behaves the same way on Python 2.6/2.7 after these imports as it does -on Python 3:: - - from __future__ import absolute_import, division, print_function - from future import bytes, str, open, super, zip, round, input, int - - # Backported Py3 bytes object - b = bytes(b'ABCD') - assert list(b) == [65, 66, 67, 68] - assert repr(b) == "b'ABCD'" - # These raise TypeErrors: - # b + u'EFGH' - # bytes(b',').join([u'Fred', u'Bill']) - - # Backported Py3 str object - s = str(u'ABCD') - assert s != bytes(b'ABCD') - assert isinstance(s.encode('utf-8'), bytes) - assert isinstance(b.decode('utf-8'), str) - assert repr(s) == 'ABCD' # consistent repr with Py3 (no u prefix) - # These raise TypeErrors: - # bytes(b'B') in s - # s.find(bytes(b'A')) - - # Extra arguments for the open() function - f = open('japanese.txt', encoding='utf-8', errors='replace') - - # New simpler super() function: - class VerboseList(list): - def append(self, item): - print('Adding an item') - super().append(item) - - # New iterable range object with slicing support - for i in range(10**15)[:10]: - pass - - # Other iterators: map, zip, filter - my_iter = zip(range(3), ['a', 'b', 'c']) - assert my_iter != list(my_iter) - - # The round() function behaves as it does in Python 3, using - # "Banker's Rounding" to the nearest even last digit: - assert round(0.1250, 2) == 0.12 - - # input() replaces Py2's raw_input() (with no eval()): - name = input('What is your name? ') - print('Hello ' + name) - - # Compatible output from isinstance() across Py2/3: - assert isinstance(2**64, int) # long integers - assert isinstance(u'blah', str) - assert isinstance('blah', str) # if unicode_literals is in effect - -There is also support for renamed standard library modules in the form of a context manager that provides import hooks:: - - from future import standard_library - - with standard_library.enable_hooks(): - from http.client import HttpConnection - from itertools import filterfalse - import html.parser - import queue - - -Next steps ----------- -Check out the :ref:`quickstart-guide`. - +.. include:: ../README.rst diff --git a/docs/pasteurize.rst b/docs/pasteurize.rst new file mode 100644 index 00000000..070b5d1a --- /dev/null +++ b/docs/pasteurize.rst @@ -0,0 +1,45 @@ +.. _backwards-conversion: + +``pasteurize``: Py3 to Py2/3 +---------------------------- + +Running ``pasteurize -w mypy3module.py`` turns this Python 3 code:: + + import configparser + import copyreg + + class Blah: + pass + print('Hello', end=None) + +into this code which runs on both Py2 and Py3:: + + from __future__ import print_function + from future import standard_library + standard_library.install_hooks() + + import configparser + import copyreg + + class Blah(object): + pass + print('Hello', end=None) + +Notice that both ``futurize`` and ``pasteurize`` create explicit new-style +classes that inherit from ``object`` on both Python versions, and both +refer to stdlib modules (as well as builtins) under their Py3 names. + +Note also that the ``configparser`` module is a special case; there is a full +backport available on PyPI (https://pypi.org/project/configparser/), so, as +of v0.16.0, ``python-future`` no longer provides a ``configparser`` package +alias. To use the resulting code on Py2, install the ``configparser`` backport +with ``pip install configparser`` or by adding it to your ``requirements.txt`` +file. + +``pasteurize`` also handles the following Python 3 features: + +- keyword-only arguments +- metaclasses (using :func:`~future.utils.with_metaclass`) +- extended tuple unpacking (PEP 3132) + +To handle function annotations (PEP 3107), see :ref:`func_annotations`. diff --git a/docs/porting.rst b/docs/porting.rst deleted file mode 100644 index 96bbab27..00000000 --- a/docs/porting.rst +++ /dev/null @@ -1,112 +0,0 @@ -.. _porting: - -Python 3 porting cheat-sheet -============================ - -Instructions and notes on porting code from Python 2 to both Python 3 and 2 using ``future``: - -.. _porting-setup: - -Step 0: setup -------------- - -Step 0 goal: set up and see the tests passing on Python 2 and failing on Python 3. - -a. Clone the package from github/bitbucket. Rename your repo to ``package-future``. Examples: ``reportlab-future``, ``paramiko-future``, ``mezzanine-future``. -b. Create and activate a Python 2 virtualenv. Install the package with ``python setup.py install`` and run its test suite on Py2.7 or Py2.6 (e.g. ``python setup.py test`` or ``py.test`` or ``nosetests``) -c. Optionally: if there’s a ``.travis.yml`` file, add Python version 3.3 and remove any versions < 2.6. -d. Install Python 3.3 with e.g. ``sudo apt-get install python3``. On other platforms, an easy way is to use Miniconda3. See `Miniconda3 `_. Then e.g.:: - - conda create -n py33 python=3.3 - -.. _porting-step1: - -Step 1: modern Py2 code ------------------------ - -The goal for this step is to modernize the Python 2 code without introducing any dependencies (on ``future`` or e.g. ``six``) at this stage. - - 1a. Install ``future`` into the virtualenv using:: - - pip install future - - 1b. Run ``futurize --stage1 -w *.py subdir1/*.py subdir2/*.py`` - - 1c. Commit all changes - - 1d. Re-run the test suite and fix any errors. - -See :ref:`forwards-conversion-stage1` for more info. - - -Example error -~~~~~~~~~~~~~ - -One relatively common error after conversion is:: - - Traceback (most recent call last): - ... - File "/home/user/Install/BleedingEdge/reportlab/tests/test_encrypt.py", line 19, in - from .test_pdfencryption import parsedoc - ValueError: Attempted relative import in non-package - -If you get this error, try adding an empty ``__init__.py`` file in the package -directory. (In this example, in the tests/ directory.) If this doesn’t help, -and if this message appears for all tests, they must be invoked differently -(from the cmd line or e.g. ``setup.py``). The way to run a module inside a -package on Python 3, or on Python 2 with ``absolute_import`` in effect, is:: - - python -m tests.test_platypus_xref - -(For more info, see `PEP 328 `_ and the `PEP 8 `_ section on absolute imports.) - - -.. _porting-step2: - -Step 2: working Py3 code that still supports Py2 ------------------------------------------------- - -The goal for this step is to get the tests passing first on Py3 and then on Py2 -again with the help of the ``future`` package. - -2a. Run:: - - futurize —-stage2 myfolder/*.py - -This adds this further import to each module:: - - from __future__ import unicode_literals - -All strings are then unicode (on Py2 as on Py3) unless explicitly marked with a ``b''`` prefix. - -It also makes other conversions needed to support both Python 2 and 3. These will likely -require additional imports from ``future``, such as:: - - from future import standard_library - from future.builtins import bytes - from future.builtins import open - -If you would like ``futurize`` to import all the changed builtins to have their Python 3 semantics on Python 2, invoke it like this:: - - futurize --stage2 --all-imports myfolder/*.py - - -2b. Re-run your tests on Py3 now. Make changes until your tests pass on Python 3. - -2c. Commit your changes! :) - -2d. Now run your tests on Python 2 and notice the errors. Add wrappers from ``future`` to re-enable Python 2 compatibility: - - - :func:`utils.reraise()` function for raising exceptions compatibly - - ``bytes(b'blah')`` instead of ``b'blah'`` - - ``str('my string')`` instead of ``'my string'`` if you need to enforce Py3’s strict type-checking on Py2 - - ``int(1234)`` instead of ``1234`` if you want to enforce a Py3-like long integer - - :func:`@utils.implements_iterator` decorator for any custom iterator class with a ``.__next__()`` method (which used to be ``.next()``) - - :func:`@utils.python_2_unicode_compatible` decorator for any class with a ``__str__`` method (which used to be ``__unicode__``). - - :func:`utils.with_metaclass` to define any metaclasses. - -See :ref:`what-else` for more info. - -After each change, re-run the tests on Py3 and Py2 to ensure they pass on both. - -2e. You’re done! Celebrate! Push your code and announce to the world! Hashtag #python-future diff --git a/docs/quickstart.rst b/docs/quickstart.rst index d9d6d847..8461a1a2 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -16,7 +16,8 @@ To install the latest stable version, type:: pip install future -If you would prefer the latest development version, it is available `here `_. +If you would prefer the latest development version, it is available `here +`_. If you are writing code from scratch @@ -26,32 +27,39 @@ The easiest way is to start each new module with these lines:: from __future__ import (absolute_import, division, print_function, unicode_literals) - from future.builtins import * + from builtins import * Then write standard Python 3 code. The :mod:`future` package will -provide support for running your code on Python 2.6 and 2.7 mostly unchanged. +provide support for running your code on Python 2.7, and 3.4+ mostly +unchanged. -See :ref:`what-else` for more details. +- For explicit import forms, see :ref:`explicit-imports`. +- For more details, see :ref:`what-else`. +- For a cheat sheet, see :ref:`compatible-idioms`. To convert existing Python 3 code --------------------------------- -To offer backward compatibility with Python 2, you can use the ``futurize`` -script with the ``--from3`` parameter. This adds these lines at the top of each +To offer backward compatibility with Python 2 from your Python 3 code, +you can use the ``pasteurize`` script. This adds these lines at the top of each module:: from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals - from future.builtins import open - from future.builtins import str + + from builtins import open + from builtins import str # etc., as needed - -and converts a few Python 3-only constructs to a form compatible with -both Py3 and Py2. Most remaining Python 3 code should simply work on -Python 2. + + from future import standard_library + standard_library.install_aliases() + +and converts several Python 3-only constructs (like keyword-only arguments) to a +form compatible with both Py3 and Py2. Most remaining Python 3 code should +simply work on Python 2. See :ref:`backwards-conversion` for more details. @@ -59,7 +67,9 @@ See :ref:`backwards-conversion` for more details. To convert existing Python 2 code --------------------------------- -Start with the :ref:`automatic-conversion` page. +.. include:: futurize_overview.rst + +See :ref:`forwards-conversion-stage1` and :ref:`forwards-conversion-stage2` for more details. .. If you already know Python 3, start with the :ref:`automatic-conversion` page. .. If you don't know Python 3 yet, start with :ref:`python3-essentials`. @@ -70,103 +80,72 @@ Start with the :ref:`automatic-conversion` page. Standard library reorganization ------------------------------- -:mod:`future` supports the standard library reorganization (PEP 3108) -via import hooks, allowing almost all moved standard library modules to -be accessed under their Python 3 names and locations in Python 2:: - +:mod:`future` supports the standard library reorganization (PEP 3108) via +one of several mechanisms, allowing most moved standard library modules +to be accessed under their Python 3 names and locations in Python 2:: + from future import standard_library - - with standard_library.enable_hooks(): - import socketserver - import queue - import configparser - import test.support - import html.parser - from collections import UserList - from itertools import filterfalse, zip_longest - from http.client import HttpConnection - # and other moved modules and definitions - -:mod:`future` also includes backports for these stdlib modules from Py3 -that were heavily refactored versus Py2:: - - with standard_library.enable_hooks(): - import html - import html.entities - import html.parser - - import http - import http.client - import http.server - -These modules are currently not supported, but we aim to support them in -the future:: - - with standard_library.enable_hooks(): - import http.cookies - import http.cookiejar - - import urllib - import urllib.parse - import urllib.request - import urllib.error - -If you need one of these, please open an issue `here -`_. + standard_library.install_aliases() -For more information on interfaces that have changed in the standard library -between Python 2 and Python 3, see :ref:`stdlib-incompatibilities`. + # Then these Py3-style imports work on both Python 2 and Python 3: + import socketserver + import queue + from collections import UserDict, UserList, UserString + from collections import ChainMap # even on Py2.7 + from itertools import filterfalse, zip_longest + import html + import html.entities + import html.parser -.. _utilities-guide: + import http + import http.client + import http.server + import http.cookies + import http.cookiejar -Utilities ---------- + import urllib.request + import urllib.parse + import urllib.response + import urllib.error + import urllib.robotparser -:mod:`future` also provides some useful functions and decorators to ease -backward compatibility with Py2 in the :mod:`future.utils` module. These -are a selection of the most useful functions from ``six`` and various -home-grown Py2/3 compatibility modules from popular Python projects, such as -Jinja2, Pandas, IPython, and Django. The goal is to consolidate these in one -place, tested and documented, obviating the need for every project to repeat -this work. + import xmlrpc.client + import xmlrpc.server -Examples:: +and others. For a complete list, see :ref:`direct-imports`. - # Functions like print() expect __str__ on Py2 to return a byte - # string. This decorator maps the __str__ to __unicode__ on Py2 and - # defines __str__ to encode it as utf-8: +.. _py2-dependencies: - from future.utils import python_2_unicode_compatible +Python 2-only dependencies +-------------------------- - @python_2_unicode_compatible - class MyClass(object): - def __str__(self): - return u'Unicode string: \u5b54\u5b50' - a = MyClass() +If you have dependencies that support only Python 2, you may be able to use the +``past`` module to automatically translate these Python 2 modules to Python 3 +upon import. First, install the Python 2-only package into your Python 3 +environment:: - # This then prints the Chinese characters for Confucius: - print(a) + $ pip3 install mypackagename --no-compile # to ignore SyntaxErrors +(or use ``pip`` if this points to your Py3 environment.) - # Iterators on Py3 require a __next__() method, whereas on Py2 this - # is called next(). This decorator allows Py3-style iterators to work - # identically on Py2: +Then add the following code at the top of your (Py3 or Py2/3-compatible) +code:: - @implements_iterator - class Upper(object): - def __init__(self, iterable): - self._iter = iter(iterable) - def __next__(self): # note the Py3 interface - return next(self._iter).upper() - def __iter__(self): - return self + from past.translation import autotranslate + autotranslate(['mypackagename']) + import mypackagename - print(list(Upper('hello'))) - # prints ['H', 'E', 'L', 'L', 'O'] +This feature is experimental, and we would appreciate your feedback on +how well this works or doesn't work for you. Please file an issue `here +`_. -On Python 3 these decorators are no-ops. +For more information on the automatic translation feature, see :ref:`translation`. -For more information, see :ref:`what-else`. +Next steps +---------- +For more information about writing Py2/3-compatible code, see: +- :ref:`compatible-idioms` +- :ref:`what-else`. diff --git a/docs/reference.rst b/docs/reference.rst index ca41a900..d9ac5e12 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -1,10 +1,10 @@ -############### -Reference Guide -############### +API Reference (in progress) +*************************** -.. *NOTE: These are still a work in progress... We need to go through our -.. docstrings and make them sphinx-compliant, and figure out how to improve -.. formatting with the sphinx-bootstrap-theme plugin.* +**NOTE: This page is still a work in progress... We need to go through our +docstrings and make them sphinx-compliant, and figure out how to improve +formatting with the sphinx-bootstrap-theme plugin. Pull requests would be +very welcome.** .. contents:: @@ -12,11 +12,21 @@ Reference Guide :depth: 2 future.builtins Interface -============================ +========================= .. automodule:: future.builtins :members: +.. Docs are also in future-builtins.rst. Extract these and put them into the +.. relevant docstrings. + + +Backported types from Python 3 +============================== + +.. automodule:: future.types + :members: + future.standard_library Interface ================================= @@ -32,11 +42,40 @@ future.utils Interface :members: -Backported types -================ +past.builtins Interface +========================= + +.. automodule:: past.builtins + :members: + +.. Docs are also in future-builtins.rst. Extract these and put them into the +.. relevant docstrings. + + +Forward-ported types from Python 2 +================================== + +.. automodule:: past.types + :members: -.. autoclass:: future.builtins.backports.newbytes -.. autoclass:: future.builtins.backports.newstr -.. autoclass:: future.builtins.backports.newint +.. bytes +.. ----- +.. .. automodule:: future.types.newbytes +.. +.. dict +.. ----- +.. .. automodule:: future.types.newdict +.. +.. int +.. --- +.. .. automodule:: future.builtins.backports.newint +.. +.. range +.. ----- +.. .. automodule:: future.types.newrange +.. +.. str +.. --- +.. .. automodule:: future.types.newstr diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..265642f4 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,3 @@ +sphinx==3.2.1 +Pallets-Sphinx-Themes==2.2.1 +setuptools==70.0.0 diff --git a/docs/roadmap.rst b/docs/roadmap.rst index a759796f..c5020d5e 100644 --- a/docs/roadmap.rst +++ b/docs/roadmap.rst @@ -5,14 +5,17 @@ futurize script --------------- 1. "Safe" mode -- from Py2 to modern Py2 or Py3 to more-compatible Py3 + - Split the fixers into two categories: safe and bold - Safe is highly unlikely to break existing Py2 or Py3 support. The output of this still requires :mod:`future` imports. Examples: - - Compatible metaclass syntax on Py3 - - Explicit inheritance from object on Py3 - + + - Compatible metaclass syntax on Py3 + - Explicit inheritance from object on Py3 + - Bold might make assumptions about which strings on Py2 should be unicode strings and which should be bytestrings. + - We should also build up a database of which standard library interfaces on Py2 and Py3 accept unicode strings versus byte-strings, which have changed, and which haven't. @@ -34,11 +37,10 @@ Experimental: from future import new_metaclass_syntax from future import new_style_classes -- Maybe:: +- [Done] Maybe:: from future.builtins import str should import a custom str is a Py3 str-like object which inherits from unicode and removes the decode() method and has any other Py3-like behaviours (possibly stricter casting?) - diff --git a/docs/standard_library_imports.rst b/docs/standard_library_imports.rst new file mode 100644 index 00000000..c09e9e30 --- /dev/null +++ b/docs/standard_library_imports.rst @@ -0,0 +1,181 @@ +.. _standard-library-imports: + +Standard library imports +------------------------ + +:mod:`future` supports the standard library reorganization (PEP 3108) through +several mechanisms. + +.. _direct-imports: + +Direct imports +~~~~~~~~~~~~~~ + +As of version 0.14, the ``future`` package comes with top-level packages for +Python 2.x that provide access to the reorganized standard library modules +under their Python 3.x names. + +Direct imports are the preferred mechanism for accessing the renamed standard +library modules in Python 2/3 compatible code. For example, the following clean +Python 3 code runs unchanged on Python 2 after installing ``future``:: + + >>> # Alias for future.builtins on Py2: + >>> from builtins import str, open, range, dict + + >>> # Top-level packages with Py3 names provided on Py2: + >>> import queue + >>> import tkinter.dialog + >>> etc. + +Notice that this code actually runs on Python 3 without the presence of the +``future`` package. + +Of the 44 modules that were refactored with PEP 3108 (standard library +reorganization), 29 are supported with direct imports in the above manner. The +complete list is here:: + + ### Renamed modules: + + import builtins + + import copyreg + + import html + import html.entities + import html.parser + + import http.client + import http.cookies + import http.cookiejar + import http.server + + import queue + + import reprlib + + import socketserver + + from tkinter import colorchooser + from tkinter import commondialog + from tkinter import constants + from tkinter import dialog + from tkinter import dnd + from tkinter import filedialog + from tkinter import font + from tkinter import messagebox + from tkinter import scrolledtext + from tkinter import simpledialog + from tkinter import tix + from tkinter import ttk + + import winreg # Windows only + + import xmlrpc.client + import xmlrpc.server + + import _dummy_thread + import _markupbase + import _thread + +Note that, as of v0.16.0, ``python-future`` no longer includes an alias for the +``configparser`` module because a full backport exists (see https://pypi.org/project/configparser/). + +.. _list-standard-library-refactored: + +Aliased imports +~~~~~~~~~~~~~~~ + +The following 14 modules were refactored or extended from Python 2.7 to 3.x +but were neither renamed in Py3.x nor were the new APIs backported to Py2.x. +This precludes compatibility interfaces that work out-of-the-box. Instead, the +``future`` package makes the Python 3.x APIs available on Python 2.x as +follows:: + + from future.standard_library import install_aliases + install_aliases() + + from collections import UserDict, UserList, UserString + + import urllib.parse + import urllib.request + import urllib.response + import urllib.robotparser + import urllib.error + + import dbm + import dbm.dumb + import dbm.gnu # requires Python dbm support + import dbm.ndbm # requires Python dbm support + + from itertools import filterfalse, zip_longest + + from subprocess import getoutput, getstatusoutput + + from sys import intern + + import test.support + + +The newly exposed ``urllib`` submodules are backports of those from Py3.x. +This means, for example, that ``urllib.parse.unquote()`` now exists and takes +an optional ``encoding`` argument on Py2.x as it does on Py3.x. + +**Limitation:** Note that the ``http``-based backports do not currently support +HTTPS (as of 2015-09-11) because the SSL support changed considerably in Python +3.x. If you need HTTPS support, please use this idiom for now:: + + from future.moves.urllib.request import urlopen + +Backports also exist of the following features from Python 3.4: + +- ``math.ceil`` returns an int on Py3 +- ``collections.ChainMap`` (for 2.7) +- ``reprlib.recursive_repr`` (for 2.7) + +These can then be imported on Python 2.7+ as follows:: + + from future.standard_library import install_aliases + install_aliases() + + from math import ceil # now returns an int + from collections import ChainMap + from reprlib import recursive_repr + + +External standard-library backports +----------------------------------- + +Backports of the following modules from the Python 3.x standard library are +available independently of the python-future project:: + + import enum # pip install enum34 + import singledispatch # pip install singledispatch + import pathlib # pip install pathlib + +A few modules from Python 3.4 are also available in the ``backports`` +package namespace after ``pip install backports.lzma`` etc.:: + + from backports import lzma + from backports import functools_lru_cache as lru_cache + + +Included full backports +----------------------- + +Alpha-quality full backports of the following modules from Python 3.3's +standard library to Python 2.x are also available in ``future.backports``:: + + http.client + http.server + html.entities + html.parser + urllib + xmlrpc.client + xmlrpc.server + +The goal for these modules, unlike the modules in the ``future.moves`` package +or top-level namespace, is to backport new functionality introduced in Python +3.3. + +If you need the full backport of one of these packages, please open an issue `here +`_. diff --git a/docs/stdlib_incompatibilities.rst b/docs/stdlib_incompatibilities.rst index 5da76d4e..e93f96ba 100644 --- a/docs/stdlib_incompatibilities.rst +++ b/docs/stdlib_incompatibilities.rst @@ -18,8 +18,7 @@ Here we will attempt to document these, together with known workarounds: ``base64``, ``decodebytes()`` function, :ref:`stdlib-base64-decodebytes` ``re``, ``ASCII`` mode, :ref:`stdlib-re-ASCII` -To contribute to this, please email the python-porting list or send a -pull request. See :ref:`contributing`. +To contribute to this list, please send a pull request. See :ref:`contributing`. .. _stdlib-array-constructor: @@ -33,14 +32,14 @@ platform string: unicode string on Python 3, byte string on Python 2. Python 2:: >>> array.array(b'b') array.array(b'b') - + >>> array.array(u'u') TypeError: must be char, not unicode Python 3:: >>> array.array(b'b') TypeError: must be a unicode character, not bytes - + >>> array.array(u'b') array('b') @@ -54,9 +53,12 @@ You can use the following code on both Python 3 and Python 2:: import array # ... - + a = array.array(bytes_to_native_str(b'b')) +This was `fixed in Python 2.7.11 +`_. +Since then, ``array.array()`` now also accepts unicode format typecode. .. _stdlib-array-read: @@ -67,9 +69,9 @@ This method has been removed in Py3. This crops up in e.g. porting ``http.client .. _stdlib-base64-decodebytes: -base64.decodebytes() --------------------- -The ``base64`` module on Py2 has no 'decodebytes'. +base64.decodebytes() and base64.encodebytes() +--------------------------------------------- +The ``base64`` module on Py2 has no ``decodebytes`` or ``encodebytes`` functions. .. _stdlib-re-ASCII: @@ -88,14 +90,16 @@ This enables 'ASCII mode' for regular expressions (see the docs `here struct.pack() ------------- -The :func:`struct.pack` function must take a native string as its format argument. For example:: +Before Python version 2.7.7, the :func:`struct.pack` function +required a native string as its format argument. For example:: >>> from __future__ import unicode_literals >>> from struct import pack - >>> pack('<4H2I', version, rec_type, build, year, file_hist_flags, ver_can_read) - -raises ``TypeError: Struct() argument 1 must be string, not unicode`` on Python -2. To work around this, pass the format string argument as e.g. -``future.utils.native('<4H2I')``. + >>> pack('<4H2I', version, rec_type, build, year, file_hist_flags, ver_can_read) +raised ``TypeError: Struct() argument 1 must be string, not unicode``. +This was `fixed in Python 2.7.7 +`_. +Since then, ``struct.pack()`` now also accepts unicode format +strings. diff --git a/docs/str_object.rst b/docs/str_object.rst index a2251cee..568b897a 100644 --- a/docs/str_object.rst +++ b/docs/str_object.rst @@ -14,15 +14,20 @@ There are also other differences, such as the ``repr`` of unicode strings in Py2 having a ``u'...'`` prefix, versus simply ``'...'``, and the removal of the :func:`str.decode` method in Py3. -:mod:`future` contains a backport of the :mod:`str` object from Python 3 which -inherits from the Python 2 :class:`unicode` class but has customizations to -improve compatibility with Python 3's :class:`str` object. You can use it as -follows:: +:mod:`future` contains a :class:`newstr` type that is a backport of the +:mod:`str` object from Python 3. This inherits from the Python 2 +:class:`unicode` class but has customizations to improve compatibility with +Python 3's :class:`str` object. You can use it as follows:: >>> from __future__ import unicode_literals - >>> from future.builtins import str + >>> from builtins import str -(On Py3, this simply imports the builtin :class:`str` object.) +On Py2, this gives us:: + + >>> str + future.types.newstr.newstr + +(On Py3, it is simply the usual builtin :class:`str` object.) Then, for example, the following code has the same effect on Py2 as on Py3:: @@ -44,14 +49,14 @@ Then, for example, the following code has the same effect on Py2 as on Py3:: TypeError: argument can't be Various other operations that mix strings and bytes or other types are -permitted on Py2 with the :class:`future.builtins.str` class even though they +permitted on Py2 with the :class:`newstr` class even though they are illegal with Python 3. For example:: >>> s2 = b'/' + str('ABCD') >>> s2 '/ABCD' >>> type(s2) - future.builtins.backports.newstr.newstr + future.types.newstr.newstr This is allowed for compatibility with parts of the Python 2 standard library and various third-party libraries that mix byte-strings and unicode @@ -62,16 +67,16 @@ they are unicode. (See ``posixpath.py``.) Another example is the .. For example, this is permissible on Py2:: -.. +.. .. >>> u'u' > 10 .. True -.. +.. .. >>> u'u' <= b'u' .. True -.. +.. .. On Py3, these raise TypeErrors. -In most other ways, these :class:`future.builtins.str` objects on Py2 have the +In most other ways, these :class:`builtins.str` objects on Py2 have the same behaviours as Python 3's :class:`str`:: >>> s = str('ABCD') @@ -79,21 +84,16 @@ same behaviours as Python 3's :class:`str`:: >>> assert list(s) == ['A', 'B', 'C', 'D'] >>> assert s.split('B') == ['A', 'CD'] -.. If you must ensure identical use of (unicode) strings across Py3 and Py2 in a -.. single-source codebase, you can wrap string literals in a :func:`~str` call, as -.. follows:: -.. -.. from __future__ import unicode_literals -.. from future.builtins import * -.. -.. # ... -.. -.. s = str('This absolutely must behave like a Py3 string') -.. -.. # ... -.. -.. Most of the time this is unnecessary, but the stricter type-checking of the -.. ``future.builtins.str`` object is useful for ensuring the same consistent -.. separation between unicode and byte strings on Py2 as on Py3. This is -.. important when writing protocol handlers, for example. +The :class:`str` type from :mod:`builtins` also provides support for the +``surrogateescape`` error handler on Python 2.x. Here is an example that works +identically on Python 2.x and 3.x:: + + >>> from builtins import str + >>> s = str(u'\udcff') + >>> s.encode('utf-8', 'surrogateescape') + b'\xff' + +This feature is in alpha. Please leave feedback `here +`_ about whether this +works for you. diff --git a/docs/translation.rst b/docs/translation.rst new file mode 100644 index 00000000..632c46b1 --- /dev/null +++ b/docs/translation.rst @@ -0,0 +1,112 @@ +.. _translation: + +Using Python 2-only dependencies on Python 3 +-------------------------------------------- + +The ``past`` module provides an experimental ``translation`` package to help +with importing and using old Python 2 modules in a Python 3 environment. + +This is implemented using PEP 414 import hooks together with fixers from +``lib2to3`` and ``libfuturize`` (included with ``python-future``) that +attempt to automatically translate Python 2 code to Python 3 code with equivalent +semantics upon import. + +*Note* This feature is still in alpha and needs further development to support a +full range of real-world Python 2 modules. Also be aware that the API for +this package might change considerably in later versions. + +Here is how to use it:: + + $ pip3 install plotrique==0.2.5-7 --no-compile # to ignore SyntaxErrors + $ python3 + +Then pass in a whitelist of module name prefixes to the +``past.translation.autotranslate()`` function. Example:: + + >>> from past.translation import autotranslate + >>> autotranslate(['plotrique']) + >>> import plotrique + +Here is another example:: + + >>> from past.translation import install_hooks, remove_hooks + >>> install_hooks(['mypy2module']) + >>> import mypy2module + >>> remove_hooks() + +This will translate, import and run Python 2 code such as the following:: + + ### File: mypy2module.py + + # Print statements are translated transparently to functions: + print 'Hello from a print statement' + + # xrange() is translated to Py3's range(): + total = 0 + for i in xrange(10): + total += i + print 'Total is: %d' % total + + # Dictionary methods like .keys() and .items() are supported and + # return lists as on Python 2: + d = {'a': 1, 'b': 2} + assert d.keys() == ['a', 'b'] + assert isinstance(d.items(), list) + + # Functions like range, reduce, map, filter also return lists: + assert isinstance(range(10), list) + + # The exec statement is supported: + exec 'total += 1' + print 'Total is now: %d' % total + + # Long integers are supported: + k = 1234983424324L + print 'k + 1 = %d' % k + + # Most renamed standard library modules are supported: + import ConfigParser + import HTMLParser + import urllib + + +The attributes of the module are then accessible normally from Python 3. +For example:: + + # This Python 3 code works + >>> type(mypy2module.d) + builtins.dict + +This is a standard Python 3 data type, so, when called from Python 3 code, +``keys()`` returns a view, not a list:: + + >>> type(mypy2module.d.keys()) + builtins.dict_keys + + +.. _translation-limitations: + +Known limitations of ``past.translation`` +***************************************** + +- It currently requires a newline at the end of the module or it throws a + ``ParseError``. + +- This only works with pure-Python modules. C extension modules and Cython code + are not supported. + +- The biggest hurdle to automatic translation is likely to be ambiguity + about byte-strings and text (unicode strings) in the Python 2 code. If the + ``past.autotranslate`` feature fails because of this, you could try + running ``futurize`` over the code and adding a ``b''`` or ``u''`` prefix to + the relevant string literals. To convert between byte-strings and text (unicode + strings), add an ``.encode`` or ``.decode`` method call. If this succeeds, + please push your patches upstream to the package maintainers. + +- Otherwise, the source translation feature offered by the ``past.translation`` + package has similar limitations to the ``futurize`` script (see + :ref:`futurize-limitations`). Help developing and testing this feature further + would be particularly welcome. + +Please report any bugs you find on the ``python-future`` `bug tracker +`_. diff --git a/docs/unicode_literals.rst b/docs/unicode_literals.rst new file mode 100644 index 00000000..f6eb2839 --- /dev/null +++ b/docs/unicode_literals.rst @@ -0,0 +1,197 @@ +.. _unicode-literals: + +Should I import unicode_literals? +--------------------------------- + +The ``future`` package can be used with or without ``unicode_literals`` +imports. + +In general, it is more compelling to use ``unicode_literals`` when +back-porting new or existing Python 3 code to Python 2/3 than when porting +existing Python 2 code to 2/3. In the latter case, explicitly marking up all +unicode string literals with ``u''`` prefixes would help to avoid +unintentionally changing the existing Python 2 API. However, if changing the +existing Python 2 API is not a concern, using ``unicode_literals`` may speed up +the porting process. + +This section summarizes the benefits and drawbacks of using +``unicode_literals``. To avoid confusion, we recommend using +``unicode_literals`` everywhere across a code-base or not at all, instead of +turning on for only some modules. + + + +Benefits +~~~~~~~~ + +1. String literals are unicode on Python 3. Making them unicode on Python 2 + leads to more consistency of your string types across the two + runtimes. This can make it easier to understand and debug your code. + +2. Code without ``u''`` prefixes is cleaner, one of the claimed advantages + of Python 3. Even though some unicode strings would require a function + call to invert them to native strings for some Python 2 APIs (see + :ref:`stdlib-incompatibilities`), the incidence of these function calls + would usually be much lower than the incidence of ``u''`` prefixes for text + strings in the absence of ``unicode_literals``. + +3. The diff when porting to a Python 2/3-compatible codebase may be smaller, + less noisy, and easier to review with ``unicode_literals`` than if an + explicit ``u''`` prefix is added to every unadorned string literal. + +4. If support for Python 3.2 is required (e.g. for Ubuntu 12.04 LTS or + Debian wheezy), ``u''`` prefixes are a ``SyntaxError``, making + ``unicode_literals`` the only option for a Python 2/3 compatible + codebase. [However, note that ``future`` doesn't support Python 3.0-3.2.] + + +Drawbacks +~~~~~~~~~ + +1. Adding ``unicode_literals`` to a module amounts to a "global flag day" for + that module, changing the data types of all strings in the module at once. + Cautious developers may prefer an incremental approach. (See + `here `_ for an excellent article + describing the superiority of an incremental patch-set in the the case + of the Linux kernel.) + +.. This is a larger-scale change than adding explicit ``u''`` prefixes to +.. all strings that should be Unicode. + +2. Changing to ``unicode_literals`` will likely introduce regressions on + Python 2 that require an initial investment of time to find and fix. The + APIs may be changed in subtle ways that are not immediately obvious. + + An example on Python 2:: + + ### Module: mypaths.py + + ... + def unix_style_path(path): + return path.replace('\\', '/') + ... + + ### User code: + + >>> path1 = '\\Users\\Ed' + >>> unix_style_path(path1) + '/Users/ed' + + On Python 2, adding a ``unicode_literals`` import to ``mypaths.py`` would + change the return type of the ``unix_style_path`` function from ``str`` to + ``unicode`` in the user code, which is difficult to anticipate and probably + unintended. + + The counter-argument is that this code is broken, in a portability + sense; we see this from Python 3 raising a ``TypeError`` upon passing the + function a byte-string. The code needs to be changed to make explicit + whether the ``path`` argument is to be a byte string or a unicode string. + +3. With ``unicode_literals`` in effect, there is no way to specify a native + string literal (``str`` type on both platforms). This can be worked around as follows:: + + >>> from __future__ import unicode_literals + >>> ... + >>> from future.utils import bytes_to_native_str as n + + >>> s = n(b'ABCD') + >>> s + 'ABCD' # on both Py2 and Py3 + + although this incurs a performance penalty (a function call and, on Py3, + a ``decode`` method call.) + + This is a little awkward because various Python library APIs (standard + and non-standard) require a native string to be passed on both Py2 + and Py3. (See :ref:`stdlib-incompatibilities` for some examples. WSGI + dictionaries are another.) + +3. If a codebase already explicitly marks up all text with ``u''`` prefixes, + and if support for Python versions 3.0-3.2 can be dropped, then + removing the existing ``u''`` prefixes and replacing these with + ``unicode_literals`` imports (the porting approach Django used) would + introduce more noise into the patch and make it more difficult to review. + However, note that the ``futurize`` script takes advantage of PEP 414 and + does not remove explicit ``u''`` prefixes that already exist. + +4. Turning on ``unicode_literals`` converts even docstrings to unicode, but + Pydoc breaks with unicode docstrings containing non-ASCII characters for + Python versions < 2.7.7. (`Fix + committed `_ in Jan 2014.):: + + >>> def f(): + ... u"Author: Martin von Löwis" + + >>> help(f) + + /Users/schofield/Install/anaconda/python.app/Contents/lib/python2.7/pydoc.pyc in pipepager(text, cmd) + 1376 pipe = os.popen(cmd, 'w') + 1377 try: + -> 1378 pipe.write(text) + 1379 pipe.close() + 1380 except IOError: + + UnicodeEncodeError: 'ascii' codec can't encode character u'\xf6' in position 71: ordinal not in range(128) + +See `this Stack Overflow thread +`_ +for other gotchas. + + +Others' perspectives +~~~~~~~~~~~~~~~~~~~~ + +In favour of ``unicode_literals`` +********************************* + +Django recommends importing ``unicode_literals`` as its top `porting tip `_ for +migrating Django extension modules to Python 3. The following `quote +`_ is +from Aymeric Augustin on 23 August 2012 regarding why he chose +``unicode_literals`` for the port of Django to a Python 2/3-compatible +codebase.: + + "... I'd like to explain why this PEP [PEP 414, which allows explicit + ``u''`` prefixes for unicode literals on Python 3.3+] is at odds with + the porting philosophy I've applied to Django, and why I would have + vetoed taking advantage of it. + + "I believe that aiming for a Python 2 codebase with Python 3 + compatibility hacks is a counter-productive way to port a project. You + end up with all the drawbacks of Python 2 (including the legacy `u` + prefixes) and none of the advantages Python 3 (especially the sane + string handling). + + "Working to write Python 3 code, with legacy compatibility for Python + 2, is much more rewarding. Of course it takes more effort, but the + results are much cleaner and much more maintainable. It's really about + looking towards the future or towards the past. + + "I understand the reasons why PEP 414 was proposed and why it was + accepted. It makes sense for legacy software that is minimally + maintained. I hope nobody puts Django in this category!" + + +Against ``unicode_literals`` +**************************** + + "There are so many subtle problems that ``unicode_literals`` causes. + For instance lots of people accidentally introduce unicode into + filenames and that seems to work, until they are using it on a system + where there are unicode characters in the filesystem path." + + -- Armin Ronacher + + "+1 from me for avoiding the unicode_literals future, as it can have + very strange side effects in Python 2.... This is one of the key + reasons I backed Armin's PEP 414." + + -- Nick Coghlan + + "Yeah, one of the nuisances of the WSGI spec is that the header values + IIRC are the str or StringType on both py2 and py3. With + unicode_literals this causes hard-to-spot bugs, as some WSGI servers + might be more tolerant than others, but usually using unicode in python + 2 for WSGI headers will cause the response to fail." + + -- Antti Haapala diff --git a/docs/upgrading.rst b/docs/upgrading.rst new file mode 100644 index 00000000..0d8afca6 --- /dev/null +++ b/docs/upgrading.rst @@ -0,0 +1,12 @@ +.. upgrading + +Upgrading +********* + +We strive to support compatibility between versions of ``python-future``. Part of this involves keeping around old interfaces and marking them as deprecated for a period to allow projects to transition in a straightforward manner to using the new interfaces. + + +.. upgrading-to-v0.12 + +Upgrading to v0.12 +================== diff --git a/docs/utilities.rst b/docs/utilities.rst new file mode 100644 index 00000000..e3f1e9c6 --- /dev/null +++ b/docs/utilities.rst @@ -0,0 +1,48 @@ +.. _utilities-guide: + +Utilities +--------- + +:mod:`future` also provides some useful functions and decorators to ease +backward compatibility with Py2 in the :mod:`future.utils` and +:mod:`past.utils` modules. These are a selection of the most useful functions +from ``six`` and various home-grown Py2/3 compatibility modules from popular +Python projects, such as Jinja2, Pandas, IPython, and Django. The goal is to +consolidate these in one place, tested and documented, obviating the need for +every project to repeat this work. + +Examples:: + + # Functions like print() expect __str__ on Py2 to return a byte + # string. This decorator maps the __str__ to __unicode__ on Py2 and + # defines __str__ to encode it as utf-8: + + from future.utils import python_2_unicode_compatible + + @python_2_unicode_compatible + class MyClass(object): + def __str__(self): + return u'Unicode string: \u5b54\u5b50' + a = MyClass() + + # This then prints the Chinese characters for Confucius: + print(a) + + + # Iterators on Py3 require a __next__() method, whereas on Py2 this + # is called next(). This decorator allows Py3-style iterators to work + # identically on Py2: + + @implements_iterator + class Upper(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def __next__(self): # note the Py3 interface + return next(self._iter).upper() + def __iter__(self): + return self + + print(list(Upper('hello'))) + # prints ['H', 'E', 'L', 'L', 'O'] + +On Python 3 these decorators are no-ops. diff --git a/docs/what_else.rst b/docs/what_else.rst index 1afb03fe..51f19869 100644 --- a/docs/what_else.rst +++ b/docs/what_else.rst @@ -10,8 +10,8 @@ compatible code. .. include:: bytes_object.rst .. include:: str_object.rst -.. include:: int_object.rst .. include:: dict_object.rst +.. include:: int_object.rst .. include:: isinstance.rst .. include:: open_function.rst .. include:: custom_str_methods.rst @@ -23,4 +23,3 @@ compatible code. .. include:: metaclasses.rst .. - diff --git a/docs/whatsnew.rst b/docs/whatsnew.rst new file mode 100644 index 00000000..d706b2e5 --- /dev/null +++ b/docs/whatsnew.rst @@ -0,0 +1,30 @@ +.. _whats-new: + +What's New +********** + +What's new in version 1.0.0 (2024-02-21) +======================================== + +The new version number of 1.0.0 indicates that the python-future project, like +Python 2, is now done. + +The most important change in this release is adding support for Python 3.12 +(ba1cc50 and a6222d2 and bcced95). + +This release also includes these fixes: + +- Small updates to the docs +- Add SECURITY.md describing security policy (0598d1b) +- Fix pasteurize: NameError: name 'unicode' is not defined (de68c10) +- Move CI to GitHub Actions (8cd11e8) +- Add setuptools to requirements for building docs (0c347ff) +- Fix typos in docs (350e87a) +- Make the fix_unpacking fixer more robust (de68c10) +- Small improvements to shell scripts according to shellcheck (6153844) + + +Previous versions +================= + +See :ref:`whats-old`. diff --git a/docs/why_python3.rst b/docs/why_python3.rst index 4992fc1f..a4b535f4 100644 --- a/docs/why_python3.rst +++ b/docs/why_python3.rst @@ -15,6 +15,38 @@ Why Python 3? Unicode representation (PEP 393)) - Exception chaining +Why are Unicode strings better on Python 3? +------------------------------------------- + +- it is not the default string type (you have to prefix the string + with a u to get Unicode); + +- it is missing some functionality, e.g. casefold; + +- there are two distinct implementations, narrow builds and wide builds; + +- wide builds take up to four times more memory per string as needed; + +- narrow builds take up to two times more memory per string as needed; + +- worse, narrow builds have very naive (possibly even "broken") + handling of code points in the Supplementary Multilingual Planes. + +The unicode string type in Python 3 is better because: + +- it is the default string type; + +- it includes more functionality; + +- starting in Python 3.3, it gets rid of the distinction between + narrow and wide builds; + +- which reduces the memory overhead of strings by up to a factor + of four in many cases; + +- and fixes the issue of SMP code points. + +(quote from a mailing list post by Steve D'Aprano on 2014-01-17). New features @@ -24,11 +56,11 @@ Standard library: ~~~~~~~~~~~~~~~~~ - SSL contexts in http.client -- +- Non-arguments for Python 3 ========================== -- +- diff --git a/future/__init__.py b/future/__init__.py deleted file mode 100644 index 7f599b39..00000000 --- a/future/__init__.py +++ /dev/null @@ -1,98 +0,0 @@ -""" -future: Easy, safe support for Python 3/2 compatibility -======================================================= - -``future`` is the missing compatibility layer between Python 3 and Python -2. It allows you to use a single, clean Python 3.x-compatible codebase to -support both Python 3 and Python 2 with minimal overhead. - -Notable projects that use ``future`` for Python 2/3 compatibility are `Mezzanine `_ and `xlwt-future `_. - -It is designed to be used as follows:: - - from __future__ import (absolute_import, division, - print_function, unicode_literals) - from future import * - -or explicitly as:: - - from future.builtins import (bytes, int, range, round, str, super, - ascii, chr, hex, input, oct, open, - filter, map, zip) - -followed by predominantly standard, idiomatic Python 3 code that then runs -similarly on Python 2.6/2.7 and Python 3.3+. - -The imports have no effect on Python 3. On Python 2, they shadow the -corresponding builtins, which normally have different semantics on Python 3 -versus 2, to provide their Python 3 semantics. - - -Standard library reorganization -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -``from future import standard_library`` provides a context-manager called -``enable_hooks`` that installs import hooks (PEP 3108) to allow renamed and -moved standard library modules to be imported from their new Py3 locations. - - -Automatic conversion --------------------- -An included script called `futurize -`_ aids in converting -code (from either Python 2 or Python 3) to code compatible with both -platforms. It is similar to ``python-modernize`` but goes further in -providing Python 3 compatibility through the use of the backported types -and builtin functions in ``future``. - - -Documentation -------------- - -See: http://python-future.org - -Also see the docstrings for each of these modules for more info:: - -- future.standard_library -- future.builtins -- future.utils - - -Credits -------- - -:Author: Ed Schofield -:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte - Ltd, Singapore. http://pythoncharmers.com -:Others: - The backported ``super()`` and ``range()`` functions are - derived from Ryan Kelly's ``magicsuper`` module and Dan - Crosta's ``xrange`` module. - - The ``futurize`` script uses ``lib2to3`` and fixers from - Joe Amenta's ``lib3to2`` and Armin Ronacher's ``python-modernize``. - - The ``python_2_unicode_compatible`` decorator is from - Django. The ``implements_iterator`` and ``with_metaclass`` - decorators are from Jinja2. - - Documentation is generated using ``sphinx`` and styled using - ``sphinx-bootstrap-theme``. - - -Licensing ---------- -Copyright 2013-2014 Python Charmers Pty Ltd, Australia. -The software is distributed under an MIT licence. See LICENSE.txt. - -""" - -from future import standard_library, utils -from future.builtins import * - -__title__ = 'future' -__author__ = 'Ed Schofield' -__license__ = 'MIT' -__copyright__ = 'Copyright 2014 Python Charmers Pty Ltd' -__ver_major__ = 0 -__ver_minor__ = 11 -__ver_patch__ = 0 -__ver_sub__ = '-dev' -__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__, - __ver_patch__, __ver_sub__) diff --git a/future/builtins/__init__.py b/future/builtins/__init__.py deleted file mode 100644 index f0f638cf..00000000 --- a/future/builtins/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -""" -A module that brings in equivalents of the new and modified Python 3 -builtins into Py2. Has no effect on Py3. - -See the docs for these modules for more information:: - -- future.builtins.iterators -- future.builtins.backports -- future.builtins.misc -- future.builtins.disabled - -""" - -from future.builtins.iterators import (filter, map, zip) -from future.builtins.misc import (ascii, chr, hex, input, oct, open) -from future.builtins.backports import (bytes, dict, int, range, round, str, - super) -from future import utils - -if not utils.PY3: - # We only import names that shadow the builtins on Py2. No other namespace - # pollution on Py2. - - # Only shadow builtins on Py2; no new names - __all__ = ['filter', 'map', 'zip', - 'ascii', 'chr', 'hex', 'input', 'oct', 'open', - 'bytes', 'dict', 'int', 'range', 'round', 'str', 'super', - ] - -else: - # No namespace pollution on Py3 - __all__ = [] - - # TODO: add 'callable' for Py3.0 and Py3.1? diff --git a/future/builtins/backports/newbytes.py b/future/builtins/backports/newbytes.py deleted file mode 100644 index 9162f877..00000000 --- a/future/builtins/backports/newbytes.py +++ /dev/null @@ -1,284 +0,0 @@ -""" -Pure-Python implementation of a Python 3-like bytes object for Python 2. - -Why do this? Without it, the Python 2 bytes object is a very, very -different beast to the Python 3 bytes object. Running the -test_bytes_from_py33.py script from the Python 3.3 test suite using -Python 2 with its default str-aliased bytes object (after the appropriate -import fixes, and using the backported test.support module) yields this: - ------------------------------------------------------------------ - Ran 203 tests in 0.214s - - FAILED (failures=31, errors=55, skipped=1) - ------------------------------------------------------------------ -when running - - $ python -m future.tests.test_bytes_from_py33 - -""" - -from collections import Iterable -from numbers import Integral - -from future.utils import istext, isbytes, PY3, with_metaclass -from future.builtins.backports import no, issubset - - -_builtin_bytes = bytes - -if PY3: - # We'll probably never use newstr on Py3 anyway... - unicode = str - - -class BaseNewBytes(type): - def __instancecheck__(cls, instance): - return isinstance(instance, _builtin_bytes) - - -class newbytes(with_metaclass(BaseNewBytes, _builtin_bytes)): - """ - A backport of the Python 3 bytes object to Py2 - """ - def __new__(cls, *args, **kwargs): - """ - From the Py3 bytes docstring: - - bytes(iterable_of_ints) -> bytes - bytes(string, encoding[, errors]) -> bytes - bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer - bytes(int) -> bytes object of size given by the parameter initialized with null bytes - bytes() -> empty bytes object - - Construct an immutable array of bytes from: - - an iterable yielding integers in range(256) - - a text string encoded using the specified encoding - - any object implementing the buffer API. - - an integer - """ - - if len(args) == 0: - return super(newbytes, cls).__new__(cls) - # Was: elif isinstance(args[0], newbytes): - # We use type() instead of the above because we're redefining - # this to be True for all unicode string subclasses. Warning: - # This may render newstr un-subclassable. - elif type(args[0]) == newbytes: - return args[0] - elif isinstance(args[0], _builtin_bytes): - value = args[0] - elif isinstance(args[0], unicode): - if 'encoding' not in kwargs: - raise TypeError('unicode string argument without an encoding') - ### - # Was: value = args[0].encode(**kwargs) - # Python 2.6 string encode() method doesn't take kwargs: - # Use this instead: - newargs = [kwargs['encoding']] - if 'errors' in kwargs: - newargs.append(kwargs['errors']) - value = args[0].encode(*newargs) - ### - elif isinstance(args[0], Iterable): - if len(args[0]) == 0: - # What is this? - raise ValueError('unknown argument type') - elif len(args[0]) > 0 and isinstance(args[0][0], Integral): - # It's a list of integers - value = b''.join([chr(x) for x in args[0]]) - else: - raise ValueError('item cannot be interpreted as an integer') - elif isinstance(args[0], Integral): - if args[0] < 0: - raise ValueError('negative count') - value = b'\x00' * args[0] - else: - value = args[0] - return super(newbytes, cls).__new__(cls, value) - - def __repr__(self): - return 'b' + super(newbytes, self).__repr__() - - def __str__(self): - return 'b' + "'{0}'".format(super(newbytes, self).__str__()) - - def __getitem__(self, y): - value = super(newbytes, self).__getitem__(y) - if isinstance(y, Integral): - return ord(value) - else: - return newbytes(value) - - def __getslice__(self, *args): - return self.__getitem__(slice(*args)) - - def __contains__(self, key): - if isinstance(key, int): - newbyteskey = newbytes([key]) - # Don't use isinstance() here because we only want to catch - # newbytes, not Python 2 str: - elif type(key) == newbytes: - newbyteskey = key - else: - newbyteskey = newbytes(key) - return issubset(list(newbyteskey), list(self)) - - @no(unicode) - def __add__(self, other): - return newbytes(super(newbytes, self).__add__(other)) - - @no(unicode) - def __radd__(self, left): - return newbytes(left) + self - - @no(unicode) - def __mul__(self, other): - return newbytes(super(newbytes, self).__mul__(other)) - - @no(unicode) - def __rmul__(self, other): - return newbytes(super(newbytes, self).__rmul__(other)) - - def join(self, iterable_of_bytes): - errmsg = 'sequence item {0}: expected bytes, {1} found' - if isbytes(iterable_of_bytes) or istext(iterable_of_bytes): - raise TypeError(errmsg.format(0, type(iterable_of_bytes))) - for i, item in enumerate(iterable_of_bytes): - if istext(item): - raise TypeError(errmsg.format(i, type(item))) - return newbytes(super(newbytes, self).join(iterable_of_bytes)) - - @classmethod - def fromhex(cls, string): - # Only on Py2: - return cls(string.replace(' ', '').decode('hex')) - - @no(unicode) - def find(self, sub, *args): - return newbytes(super(newbytes, self).find(sub, *args)) - - @no(unicode) - def rfind(self, sub, *args): - return newbytes(super(newbytes, self).rfind(sub, *args)) - - @no(unicode, (1, 2)) - def replace(self, old, new, *args): - return newbytes(super(newbytes, self).replace(old, new, *args)) - - def encode(self, *args): - raise AttributeError("encode method has been disabled in newbytes") - - def decode(self, encoding='utf-8', errors='strict'): - """ - Returns a newstr (i.e. unicode subclass) - - Decode B using the codec registered for encoding. Default encoding - is 'utf-8'. errors may be given to set a different error - handling scheme. Default is 'strict' meaning that encoding errors raise - a UnicodeDecodeError. Other possible values are 'ignore' and 'replace' - as well as any other name registered with codecs.register_error that is - able to handle UnicodeDecodeErrors. - """ - from future.builtins.backports.newstr import newstr - return newstr(super(newbytes, self).decode(encoding, errors)) - - @no(unicode) - def startswith(self, prefix, *args): - return super(newbytes, self).startswith(prefix, *args) - - @no(unicode) - def endswith(self, prefix, *args): - return super(newbytes, self).endswith(prefix, *args) - - @no(unicode) - def split(self, sep=None, maxsplit=-1): - # Py2 str.split() takes maxsplit as an optional parameter, not as a - # keyword argument as in Python 3 bytes. - parts = super(newbytes, self).split(sep, maxsplit) - return [newbytes(part) for part in parts] - - @no(unicode) - def rsplit(self, sep=None, maxsplit=-1): - # Py2 str.rsplit() takes maxsplit as an optional parameter, not as a - # keyword argument as in Python 3 bytes. - parts = super(newbytes, self).rsplit(sep, maxsplit) - return [newbytes(part) for part in parts] - - @no(unicode) - def partition(self, sep): - parts = super(newbytes, self).partition(sep) - return tuple(newbytes(part) for part in parts) - - @no(unicode) - def rpartition(self, sep): - parts = super(newbytes, self).rpartition(sep) - return tuple(newbytes(part) for part in parts) - - @no(unicode) - def index(self, sub, *args): - ''' - Returns index of sub in bytes. - Raises ValueError if byte is not in bytes and TypeError if can't - be converted bytes or its length is not 1. - ''' - if isinstance(sub, int): - if len(args) == 0: - start, end = 0, len(self) - elif len(args) == 1: - start = args[0] - elif len(args) == 2: - start, end = args - else: - raise TypeError('takes at most 3 arguments') - return list(self)[start:end].index(sub) - if not isinstance(sub, bytes): - try: - sub = self.__class__(sub) - except (TypeError, ValueError): - raise TypeError("can't convert sub to bytes") - try: - return super(newbytes, self).index(sub, *args) - except ValueError: - raise ValueError('substring not found') - - def __eq__(self, other): - if isinstance(other, _builtin_bytes): - return super(newbytes, self).__eq__(other) - else: - return False - - def __ne__(self, other): - if isinstance(other, _builtin_bytes): - return super(newbytes, self).__ne__(other) - else: - return True - - unorderable_err = 'unorderable types: bytes() and {0}' - - def __lt__(self, other): - if not isbytes(other): - raise TypeError(self.unorderable_err.format(type(other))) - return super(newbytes, self).__lt__(other) - - def __le__(self, other): - if not isbytes(other): - raise TypeError(self.unorderable_err.format(type(other))) - return super(newbytes, self).__le__(other) - - def __gt__(self, other): - if not isbytes(other): - raise TypeError(self.unorderable_err.format(type(other))) - return super(newbytes, self).__gt__(other) - - def __ge__(self, other): - if not isbytes(other): - raise TypeError(self.unorderable_err.format(type(other))) - return super(newbytes, self).__ge__(other) - - def __native__(self): - # We can't just feed a newbytes object into str(), because - # newbytes.__str__() returns e.g. "b'blah'", consistent with Py3 bytes. - return super(newbytes, self).__str__() - - -__all__ = ['newbytes'] diff --git a/future/builtins/backports/newdict.py b/future/builtins/backports/newdict.py deleted file mode 100644 index 0e46d91f..00000000 --- a/future/builtins/backports/newdict.py +++ /dev/null @@ -1,113 +0,0 @@ -""" -A dict subclass for Python 2 that behaves like Python 3's dict - -Example use: - ->>> from future.builtins import dict ->>> d1 = dict() # instead of {} for an empty dict ->>> d2 = dict(key1='value1', key2='value2') - -The keys, values and items methods now return iterators on Python 2.x -(with set-like behaviour on Python 2.7). - ->>> for d in (d1, d2): -... assert not isinstance(d.keys(), list) -... assert not isinstance(d.values(), list) -... assert not isinstance(d.items(), list) -""" - -import sys - -from future.utils import with_metaclass - - -_builtin_dict = dict -ver = sys.version_info[:2] - - -class BaseNewDict(type): - def __instancecheck__(cls, instance): - return isinstance(instance, _builtin_dict) - -class newdict(with_metaclass(BaseNewDict, _builtin_dict)): - """ - A backport of the Python 3 dict object to Py2 - """ - def items(self): - """ - On Python 2.7+: - D.items() -> a set-like object providing a view on D's items - On Python 2.6: - D.items() -> an iterator over D's items - """ - if ver == (2, 7): - return self.viewitems() - elif ver == (2, 6): - return self.iteritems() - elif ver >= (3, 0): - return self.items() - - def keys(self): - """ - On Python 2.7+: - D.keys() -> a set-like object providing a view on D's keys - On Python 2.6: - D.keys() -> an iterator over D's keys - """ - if ver == (2, 7): - return self.viewkeys() - elif ver == (2, 6): - return self.iterkeys() - elif ver >= (3, 0): - return self.keys() - - def values(self): - """ - On Python 2.7+: - D.values() -> a set-like object providing a view on D's values - On Python 2.6: - D.values() -> an iterator over D's values - """ - if ver == (2, 7): - return self.viewvalues() - elif ver == (2, 6): - return self.itervalues() - elif ver >= (3, 0): - return self.values() - - def __new__(cls, *args, **kwargs): - """ - dict() -> new empty dictionary - dict(mapping) -> new dictionary initialized from a mapping object's - (key, value) pairs - dict(iterable) -> new dictionary initialized as if via: - d = {} - for k, v in iterable: - d[k] = v - dict(**kwargs) -> new dictionary initialized with the name=value pairs - in the keyword argument list. For example: dict(one=1, two=2) - - """ - - if len(args) == 0: - return super(newdict, cls).__new__(cls) - # Was: elif isinstance(args[0], newbytes): - # We use type() instead of the above because we're redefining - # this to be True for all unicode string subclasses. Warning: - # This may render newstr un-subclassable. - elif type(args[0]) == newdict: - return args[0] - # elif isinstance(args[0], _builtin_dict): - # value = args[0] - else: - value = args[0] - return super(newdict, cls).__new__(cls, value) - - def __native__(self): - """ - Hook for the future.utils.native() function - """ - return super(newbytes, self) - - -__all__ = ['newdict'] diff --git a/future/builtins/backports/newint.py b/future/builtins/backports/newint.py deleted file mode 100644 index 7efccc9c..00000000 --- a/future/builtins/backports/newint.py +++ /dev/null @@ -1,196 +0,0 @@ -""" -Backport of Python 3's int, based on Py2's long. - -They are very similar. The most notable difference is: - -- representation: trailing L in Python 2 removed in Python 3 - -""" - -from numbers import Integral - -from future.builtins.backports.newbytes import newbytes -from future.utils import PY3, isint, istext, isbytes, with_metaclass - - -if PY3: - long = int - - -class BaseNewInt(type): - def __instancecheck__(cls, instance): - # Special case for Py2 short or long int - return isinstance(instance, (int, long)) - - -class newint(with_metaclass(BaseNewInt, long)): - """ - A backport of the Python 3 int object to Py2 - """ - def __new__(cls, x=0, base=10): - """ - From the Py3 int docstring: - - | int(x=0) -> integer - | int(x, base=10) -> integer - | - | Convert a number or string to an integer, or return 0 if no arguments - | are given. If x is a number, return x.__int__(). For floating point - | numbers, this truncates towards zero. - | - | If x is not a number or if base is given, then x must be a string, - | bytes, or bytearray instance representing an integer literal in the - | given base. The literal can be preceded by '+' or '-' and be surrounded - | by whitespace. The base defaults to 10. Valid bases are 0 and 2-36. - | Base 0 means to interpret the base from the string as an integer literal. - | >>> int('0b100', base=0) - | 4 - - """ - try: - val = x.__int__() - except AttributeError: - val = x - else: - if not isint(val): - raise TypeError('__int__ returned non-int ({0})'.format(type(val))) - - if base != 10: - # Explicit base - if not (istext(val) or isbytes(val) or isinstance(val, bytearray)): - raise TypeError("int() can't convert non-string with explicit base") - try: - return super(newint, cls).__new__(cls, val, base) - except TypeError: - return super(newint, cls).__new__(cls, newbytes(val), base) - # After here, base is 10 - try: - return super(newint, cls).__new__(cls, val) - except TypeError: - # Py2 long doesn't handle bytearray input with an explicit base, so - # handle this here. - # Py3: int(bytearray(b'10'), 2) == 2 - # Py2: int(bytearray(b'10'), 2) == 2 raises TypeError - # Py2: long(bytearray(b'10'), 2) == 2 raises TypeError - try: - return super(newint, cls).__new__(cls, newbytes(val)) - except: - raise TypeError("newint argument must be a string or a number, not '{0}'".format( - type(val))) - - - def __repr__(self): - """ - Without the L suffix - """ - value = super(newint, self).__repr__() - assert value[-1] == 'L' - return value[:-1] - - def __add__(self, other): - return newint(super(newint, self).__add__(other)) - - def __radd__(self, other): - return newint(super(newint, self).__radd__(other)) - - def __sub__(self, other): - return newint(super(newint, self).__sub__(other)) - - def __rsub__(self, other): - return newint(super(newint, self).__rsub__(other)) - - def __mul__(self, other): - value = super(newint, self).__mul__(other) - if isint(value): - return newint(value) - return value - - def __rmul__(self, other): - value = super(newint, self).__rmul__(other) - if isint(value): - return newint(value) - return value - - def __div__(self, other): - return newint(super(newint, self).__div__(other)) - - def __rdiv__(self, other): - return newint(super(newint, self).__rdiv__(other)) - - def __floordiv__(self, other): - return newint(super(newint, self).__floordiv__(other)) - - def __rfloordiv__(self, other): - return newint(super(newint, self).__rfloordiv__(other)) - - def __mod__(self, other): - return newint(super(newint, self).__mod__(other)) - - def __rmod__(self, other): - return newint(super(newint, self).__rmod__(other)) - - def __divmod__(self, other): - result = super(newint, self).__divmod__(other) - return (newint(result[0]), newint(result[1])) - - def __rdivmod__(self, other): - result = super(newint, self).__rdivmod__(other) - return (newint(result[0]), newint(result[1])) - - def __pow__(self, other): - return newint(super(newint, self).__pow__(other)) - - def __rpow__(self, other): - return newint(super(newint, self).__rpow__(other)) - - def __lshift__(self, other): - return newint(super(newint, self).__lshift__(other)) - - def __rlshift__(self, other): - return newint(super(newint, self).__lshift__(other)) - - def __rshift__(self, other): - return newint(super(newint, self).__rshift__(other)) - - def __rrshift__(self, other): - return newint(super(newint, self).__rshift__(other)) - - def __and__(self, other): - return newint(super(newint, self).__and__(other)) - - def __rand__(self, other): - return newint(super(newint, self).__rand__(other)) - - def __or__(self, other): - return newint(super(newint, self).__or__(other)) - - def __ror__(self, other): - return newint(super(newint, self).__ror__(other)) - - def __xor__(self, other): - return newint(super(newint, self).__xor__(other)) - - def __rxor__(self, other): - return newint(super(newint, self).__rxor__(other)) - - # __radd__(self, other) __rsub__(self, other) __rmul__(self, other) __rdiv__(self, other) __rtruediv__(self, other) __rfloordiv__(self, other) __rmod__(self, other) __rdivmod__(self, other) __rpow__(self, other) __rlshift__(self, other) __rrshift__(self, other) __rand__(self, other) __rxor__(self, other) __ror__(self, other) - - # __iadd__(self, other) __isub__(self, other) __imul__(self, other) __idiv__(self, other) __itruediv__(self, other) __ifloordiv__(self, other) __imod__(self, other) __ipow__(self, other, [modulo]) __ilshift__(self, other) __irshift__(self, other) __iand__(self, other) __ixor__(self, other) __ior__(self, other) - - def __neg__(self): - return newint(super(newint, self).__neg__()) - - def __pos__(self): - return newint(super(newint, self).__pos__()) - - def __abs__(self): - return newint(super(newint, self).__abs__()) - - def __invert__(self): - return newint(super(newint, self).__invert__()) - - def __native__(self): - return long(self) - - -__all__ = ['newint'] diff --git a/future/builtins/backports/newround.py b/future/builtins/backports/newround.py deleted file mode 100644 index 4287afff..00000000 --- a/future/builtins/backports/newround.py +++ /dev/null @@ -1,48 +0,0 @@ -""" -``python-future``: pure Python implementation of Python 3 round(). -""" - -from future.utils import PYPY - - -def newround(number, ndigits=None): - """ - See Python 3 documentation: uses Banker's Rounding. - - Delegates to the __round__ method if for some reason this exists. - - If not, rounds a number to a given precision in decimal digits (default - 0 digits). This returns an int when called with one argument, - otherwise the same type as the number. ndigits may be negative. - - See the test_round method in future/tests/test_builtins.py for - examples. - """ - return_int = False - if ndigits is None: - return_int = True - ndigits = 0 - if hasattr(number, '__round__'): - return number.__round__(ndigits) - - # Use the decimal module for simplicity of implementation (and - # hopefully correctness). - from decimal import Decimal, ROUND_HALF_EVEN - - if ndigits < 0: - raise NotImplementedError('negative ndigits not supported yet') - exponent = Decimal('10') ** (-ndigits) - - if PYPY: - # Work around issue #24: round() breaks on PyPy with NumPy's types - if 'numpy' in repr(type(number)): - number = float(number) - d = Decimal.from_float(number).quantize(exponent, - rounding=ROUND_HALF_EVEN) - if return_int: - return int(d) - else: - return float(d) - - -__all__ = ['newround'] diff --git a/future/builtins/misc.py b/future/builtins/misc.py deleted file mode 100644 index 46b6d472..00000000 --- a/future/builtins/misc.py +++ /dev/null @@ -1,79 +0,0 @@ -""" -A module that brings in equivalents of the new and modified Python 3 -builtins into Py2. Has no effect on Py3. - -The builtin functions are: - -- ``ascii`` (from Py2's future_builtins module) -- ``hex`` (from Py2's future_builtins module) -- ``oct`` (from Py2's future_builtins module) -- ``chr`` (equivalent to ``unichr`` on Py2) -- ``input`` (equivalent to ``raw_input`` on Py2) -- ``open`` (equivalent to io.open on Py2) - - -input() -------- -Like the new ``input()`` function from Python 3 (without eval()), except -that it returns bytes. Equivalent to Python 2's ``raw_input()``. - -Warning: By default, importing this module *removes* the old Python 2 -input() function entirely from ``__builtin__`` for safety. This is -because forgetting to import the new ``input`` from ``future`` might -otherwise lead to a security vulnerability (shell injection) on Python 2. - -To restore it, you can retrieve it yourself from -``__builtin__._old_input``. - -Fortunately, ``input()`` seems to be seldom used in the wild in Python -2... - -""" - -from future.builtins.backports.newint import newint -from future import utils - - -if utils.PY2: - from io import open - from future_builtins import ascii, oct, hex - from __builtin__ import unichr as chr - import __builtin__ - - # The following seems like a good idea, but it may be a bit - # paranoid and the implementation may be fragile: - - # Python 2's input() is unsafe and MUST not be able to be used - # accidentally by someone who expects Python 3 semantics but forgets - # to import it on Python 2. So we delete it from __builtin__. We - # keep a copy though: - __builtin__._oldinput = __builtin__.input - delattr(__builtin__, 'input') - - input = raw_input - - # In case some code wants to import 'callable' portably from Py3.0/3.1: - callable = __builtin__.callable - - __all__ = ['ascii', 'chr', 'hex', 'input', 'oct', 'open'] - -else: - import builtins - ascii = builtins.ascii - chr = builtins.chr - hex = builtins.hex - input = builtins.input - oct = builtins.oct - open = builtins.open - - __all__ = [] - - # From Pandas, for Python versions 3.0 and 3.1 only. The callable() - # function was removed from Py3.0 and 3.1 and reintroduced into Py3.2. - try: - # callable reintroduced in later versions of Python - callable = builtins.callable - except AttributeError: - def callable(obj): - return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) - __all__.append('callable') diff --git a/future/standard_library/__init__.py b/future/standard_library/__init__.py deleted file mode 100644 index df793111..00000000 --- a/future/standard_library/__init__.py +++ /dev/null @@ -1,441 +0,0 @@ -""" -Python 3 reorganized the standard library (PEP 3108). This module exposes -several standard library modules to Python 2 under their new Python 3 -names. - -It is designed to be used as follows:: - - from future import standard_library - -And then these normal Py3 imports work on both Py3 and Py2:: - - import builtins - import configparser - import copyreg - import queue - import reprlib - import socketserver - import winreg # on Windows only - import test.support - import html, html.parser, html.entites - import http, http.client, http.server - import _thread - import _dummythread - import _markupbase - - from itertools import filterfalse, zip_longest - from sys import intern - -(The renamed modules and functions are still available under their old -names on Python 2.) - -To turn off the import hooks, use:: - - standard_library.remove_hooks() - -and to turn it on again, use:: - - standard_library.install_hooks() - -This is a cleaner alternative to this idiom (see -http://docs.pythonsprints.com/python3_porting/py-porting.html):: - - try: - import queue - except ImportError: - import Queue as queue - - -Limitations ------------ -We don't currently support these modules, but would like to:: - - import http.cookies, http.cookiejar - import dbm - import dbm.dumb - import dbm.gnu - import xmlrpc.client - import collections.abc # on Py33 - import urllib.request - import urllib.parse - import urllib.error - import urllib.robotparser - import tkinter - import pickle # should (optionally) bring in cPickle on Python 2 - - -Notes ------ -This module only supports Python 2.6, Python 2.7, and Python 3.1+. - -The following renames are already supported on Python 2.7 without any -additional work from us:: - - reload() -> imp.reload() - reduce() -> functools.reduce() - StringIO.StringIO -> io.StringIO - Bytes.BytesIO -> io.BytesIO - -Old things that can one day be fixed automatically by futurize.py:: - - string.uppercase -> string.ascii_uppercase # works on either Py2.7 or Py3+ - sys.maxint -> sys.maxsize # but this isn't identical - -TODO: Check out these: -Not available on Py2.6: - unittest2 -> unittest? - buffer -> memoryview? - -""" - -from __future__ import absolute_import - -import sys -import logging -import imp -import contextlib - -from future import utils - -# The modules that are defined under the same names on Py3 but with -# different contents in a significant way (e.g. submodules) are: -# pickle (fast one) -# dbm -# urllib -# test - -# These ones are new (i.e. no problem) -# http -# html -# tkinter -# xmlrpc - -# These modules need names from elsewhere being added to them: -# subprocess: should provide getoutput and other fns from commands -# module but these fns are missing: getstatus, mk2arg, -# mkarg - -# Old to new -# etc: see lib2to3/fixes/fix_imports.py -RENAMES = { - # 'cStringIO': 'io', # there's a new io module in Python 2.6 - # that provides StringIO and BytesIO - # 'StringIO': 'io', # ditto - # 'cPickle': 'pickle', - '__builtin__': 'builtins', - 'copy_reg': 'copyreg', - 'Queue': 'queue', - 'future.standard_library.socketserver': 'socketserver', - 'ConfigParser': 'configparser', - 'repr': 'reprlib', - # 'FileDialog': 'tkinter.filedialog', - # 'tkFileDialog': 'tkinter.filedialog', - # 'SimpleDialog': 'tkinter.simpledialog', - # 'tkSimpleDialog': 'tkinter.simpledialog', - # 'tkColorChooser': 'tkinter.colorchooser', - # 'tkCommonDialog': 'tkinter.commondialog', - # 'Dialog': 'tkinter.dialog', - # 'Tkdnd': 'tkinter.dnd', - # 'tkFont': 'tkinter.font', - # 'tkMessageBox': 'tkinter.messagebox', - # 'ScrolledText': 'tkinter.scrolledtext', - # 'Tkconstants': 'tkinter.constants', - # 'Tix': 'tkinter.tix', - # 'ttk': 'tkinter.ttk', - # 'Tkinter': 'tkinter', - '_winreg': 'winreg', - 'thread': '_thread', - 'dummy_thread': '_dummy_thread', - # 'anydbm': 'dbm', # causes infinite import loop - # 'whichdb': 'dbm', # causes infinite import loop - # anydbm and whichdb are handled by fix_imports2 - # 'dbhash': 'dbm.bsd', - # 'dumbdbm': 'dbm.dumb', - # 'dbm': 'dbm.ndbm', - # 'gdbm': 'dbm.gnu', - # 'xmlrpclib': 'xmlrpc.client', - # 'DocXMLRPCServer': 'xmlrpc.server', - # 'SimpleXMLRPCServer': 'xmlrpc.server', - # 'httplib': 'http.client', - # 'htmlentitydefs' : 'html.entities', - # 'HTMLParser' : 'html.parser', - # 'Cookie': 'http.cookies', - # 'cookielib': 'http.cookiejar', - # 'BaseHTTPServer': 'http.server', - # 'SimpleHTTPServer': 'http.server', - # 'CGIHTTPServer': 'http.server', - 'future.standard_library.test': 'test', # primarily for renaming test_support to support - # 'commands': 'subprocess', - # 'urlparse' : 'urllib.parse', - # 'robotparser' : 'urllib.robotparser', - # 'abc': 'collections.abc', # for Py33 - 'future.standard_library.html': 'html', - 'future.standard_library.http': 'http', - # 'future.standard_library.urllib': 'newurllib', - 'future.standard_library._markupbase': '_markupbase', - } - - -REPLACED_MODULES = set(['test', 'urllib', 'pickle']) # add dbm when we support it -# These are entirely new to Python 2.x, so they cause no potential clashes -# xmlrpc, tkinter, http, html - - -class WarnOnImport(object): - def __init__(self, *args): - self.module_names = args - - def find_module(self, fullname, path=None): - if fullname in self.module_names: - self.path = path - return self - return None - - def load_module(self, name): - if name in sys.modules: - return sys.modules[name] - module_info = imp.find_module(name, self.path) - module = imp.load_module(name, *module_info) - sys.modules[name] = module - - logging.warning("Imported deprecated module %s", name) - return module - - -class RenameImport(object): - """ - A class for import hooks mapping Py3 module names etc. to the Py2 equivalents. - """ - # Different RenameImport classes are created when importing this module from - # different source files. This causes isinstance(hook, RenameImport) checks - # to produce inconsistent results. We add this RENAMER attribute here so - # remove_hooks() and install_hooks() can find instances of these classes - # easily: - RENAMER = True - - def __init__(self, old_to_new): - ''' - Pass in a dictionary-like object mapping from old names to new - names. E.g. {'ConfigParser': 'configparser', 'cPickle': 'pickle'} - ''' - self.old_to_new = old_to_new - both = set(old_to_new.keys()) & set(old_to_new.values()) - assert (len(both) == 0 and - len(set(old_to_new.values())) == len(old_to_new.values())), \ - 'Ambiguity in renaming (handler not implemented)' - self.new_to_old = dict((new, old) for (old, new) in old_to_new.items()) - - def find_module(self, fullname, path=None): - # Handles hierarchical importing: package.module.module2 - new_base_names = set([s.split('.')[0] for s in self.new_to_old]) - if fullname in set(self.old_to_new) | new_base_names: - return self - return None - - def load_module(self, name): - path = None - if name in sys.modules: - return sys.modules[name] - elif name in self.new_to_old: - # New name. Look up the corresponding old (Py2) name: - name = self.new_to_old[name] - with suspend_hooks(): - module = self._find_and_load_module(name) - sys.modules[name] = module - return module - - def _find_and_load_module(self, name, path=None): - """ - Finds and loads it. But if there's a . in the name, handles it - properly. - """ - bits = name.split('.') - while len(bits) > 1: - # Treat the first bit as a package - packagename = bits.pop(0) - package = self._find_and_load_module(packagename, path) - path = package.__path__ - name = bits[0] - module_info = imp.find_module(name, path) - return imp.load_module(name, *module_info) - - -# (New module name, new object name, old module name, old object name) -MOVES = [('collections', 'UserList', 'UserList', 'UserList'), - ('collections', 'UserDict', 'UserDict', 'UserDict'), - ('collections', 'UserString','UserString', 'UserString'), - ('itertools', 'filterfalse','itertools', 'ifilterfalse'), - ('itertools', 'zip_longest','itertools', 'izip_longest'), - ('sys', 'intern','__builtin__', 'intern'), - # urllib._urlopener urllib.request - # urllib.ContentTooShortError urllib.error - # urllib.FancyURLOpener urllib.request - # urllib.pathname2url urllib.request - # urllib.quote urllib.parse - # urllib.quote_plus urllib.parse - # urllib.splitattr urllib.parse - # urllib.splithost urllib.parse - # urllib.splitnport urllib.parse - # urllib.splitpasswd urllib.parse - # urllib.splitport urllib.parse - # urllib.splitquery urllib.parse - # urllib.splittag urllib.parse - # urllib.splittype urllib.parse - # urllib.splituser urllib.parse - # urllib.splitvalue urllib.parse - # urllib.unquote urllib.parse - # urllib.unquote_plus urllib.parse - # urllib.urlcleanup urllib.request - # urllib.urlencode urllib.parse - # urllib.urlopen urllib.request - # urllib.URLOpener urllib.request - # urllib.urlretrieve urllib.request - # urllib2.AbstractBasicAuthHandler urllib.request - # urllib2.AbstractDigestAuthHandler urllib.request - # urllib2.BaseHandler urllib.request - # urllib2.build_opener urllib.request - # urllib2.CacheFTPHandler urllib.request - # urllib2.FileHandler urllib.request - # urllib2.FTPHandler urllib.request - # urllib2.HTTPBasicAuthHandler urllib.request - # urllib2.HTTPCookieProcessor urllib.request - # urllib2.HTTPDefaultErrorHandler urllib.request - # urllib2.HTTPDigestAuthHandler urllib.request - # urllib2.HTTPError urllib.request - # urllib2.HTTPHandler urllib.request - # urllib2.HTTPPasswordMgr urllib.request - # urllib2.HTTPPasswordMgrWithDefaultRealm urllib.request - # urllib2.HTTPRedirectHandler urllib.request - # urllib2.HTTPSHandler urllib.request - # urllib2.install_opener urllib.request - # urllib2.OpenerDirector urllib.request - # urllib2.ProxyBasicAuthHandler urllib.request - # urllib2.ProxyDigestAuthHandler urllib.request - # urllib2.ProxyHandler urllib.request - # urllib2.Request urllib.request - # urllib2.UnknownHandler urllib.request - # urllib2.URLError urllib.request - # urllib2.urlopen urllib.request - # urlparse.parse_qs urllib.parse - # urlparse.parse_qsl urllib.parse - # urlparse.urldefrag urllib.parse - # urlparse.urljoin urllib.parse - # urlparse.urlparse urllib.parse - # urlparse.urlsplit urllib.parse - # urlparse.urlunparse urllib.parse - # urlparse.urlunsplit urllib.parse - ] - - -class enable_hooks(object): - """ - Acts as a context manager. Use like this: - - >>> from future import standard_library - >>> with standard_library.enable_hooks(): - ... import http.client - >>> import requests # incompatible with ``future``'s standard library hooks - """ - def __enter__(self): - print('Entering CM') - self.hooks_were_installed = detect_hooks() - install_hooks() - return self - - def __exit__(self, *args): - print('Exiting CM') - if not self.hooks_were_installed: - remove_hooks() - - -class suspend_hooks(object): - """ - Acts as a context manager. Use like this: - - >>> from future import standard_library - >>> standard_library.install_hooks() - >>> import http.client - >>> # ... - >>> with standard_library.suspend_hooks(): - >>> import requests # incompatible with ``future``'s standard library hooks - - If the hooks were disabled before the context, they are not installed when - the context is left. - """ - def __enter__(self): - self.hooks_were_installed = detect_hooks() - remove_hooks() - return self - def __exit__(self, *args): - if not self.hooks_were_installed: - install_hooks() - - -def install_hooks(): - print('sys.meta_path was: {}'.format(sys.meta_path)) - print('Installing hooks ...') - if utils.PY3: - return - for (newmodname, newobjname, oldmodname, oldobjname) in MOVES: - newmod = __import__(newmodname) - oldmod = __import__(oldmodname) - obj = getattr(oldmod, oldobjname) - setattr(newmod, newobjname, obj) - - # Add it unless it's there already - newhook = RenameImport(RENAMES) - if not detect_hooks(): - sys.meta_path.append(newhook) - print('sys.meta_path is now: {}'.format(sys.meta_path)) - - -def remove_hooks(): - """ - Use to remove the ``future.standard_library`` import hooks. - """ - print('sys.meta_path was: {}'.format(sys.meta_path)) - print('Uninstalling hooks ...') - if not utils.PY3: - # Loop backwards, so deleting items keeps the ordering: - for i, hook in list(enumerate(sys.meta_path))[::-1]: - if hasattr(hook, 'RENAMER'): - del sys.meta_path[i] - print('sys.meta_path is now: {}'.format(sys.meta_path)) - - -def disable_hooks(): - """ - Deprecated. Use remove_hooks() instead. This will be removed by - ``future`` v1.0. - """ - remove_hooks() - - -def detect_hooks(): - """ - Returns True if the import hooks are installed, False if not. - """ - print('Detecting hooks ...') - present = any([hasattr(hook, 'RENAMER') for hook in sys.meta_path]) - if present: - print('Detected.') - else: - print('Not detected.') - return present - - -# Now import the modules: -# with enable_hooks(): -# for (oldname, newname) in RENAMES.items(): -# if newname == 'winreg' and sys.platform not in ['win32', 'win64']: -# continue -# if newname in REPLACED_MODULES: -# # Skip this check for e.g. the stdlib's ``test`` module, -# # which we have replaced completely. -# continue -# newmod = __import__(newname) -# globals()[newname] = newmod - - -if not utils.PY3: - install_hooks() diff --git a/future/standard_library/test/regrtest.py b/future/standard_library/test/regrtest.py deleted file mode 100755 index 26f27ff3..00000000 --- a/future/standard_library/test/regrtest.py +++ /dev/null @@ -1,1564 +0,0 @@ -#! /usr/bin/python2.7 - -""" -Usage: - -python -m test.regrtest [options] [test_name1 [test_name2 ...]] -python path/to/Lib/test/regrtest.py [options] [test_name1 [test_name2 ...]] - - -If no arguments or options are provided, finds all files matching -the pattern "test_*" in the Lib/test subdirectory and runs -them in alphabetical order (but see -M and -u, below, for exceptions). - -For more rigorous testing, it is useful to use the following -command line: - -python -E -tt -Wd -3 -m test.regrtest [options] [test_name1 ...] - - -Options: - --h/--help -- print this text and exit - -Verbosity - --v/--verbose -- run tests in verbose mode with output to stdout --w/--verbose2 -- re-run failed tests in verbose mode --W/--verbose3 -- re-run failed tests in verbose mode immediately --q/--quiet -- no output unless one or more tests fail --S/--slow -- print the slowest 10 tests - --header -- print header with interpreter info - -Selecting tests - --r/--randomize -- randomize test execution order (see below) - --randseed -- pass a random seed to reproduce a previous random run --f/--fromfile -- read names of tests to run from a file (see below) --x/--exclude -- arguments are tests to *exclude* --s/--single -- single step through a set of tests (see below) --u/--use RES1,RES2,... - -- specify which special resource intensive tests to run --M/--memlimit LIMIT - -- run very large memory-consuming tests - -Special runs - --l/--findleaks -- if GC is available detect tests that leak memory --L/--runleaks -- run the leaks(1) command just before exit --R/--huntrleaks RUNCOUNTS - -- search for reference leaks (needs debug build, v. slow) --j/--multiprocess PROCESSES - -- run PROCESSES processes at once --T/--coverage -- turn on code coverage tracing using the trace module --D/--coverdir DIRECTORY - -- Directory where coverage files are put --N/--nocoverdir -- Put coverage files alongside modules --t/--threshold THRESHOLD - -- call gc.set_threshold(THRESHOLD) --F/--forever -- run the specified tests in a loop, until an error happens - - -Additional Option Details: - --r randomizes test execution order. You can use --randseed=int to provide a -int seed value for the randomizer; this is useful for reproducing troublesome -test orders. - --s On the first invocation of regrtest using -s, the first test file found -or the first test file given on the command line is run, and the name of -the next test is recorded in a file named pynexttest. If run from the -Python build directory, pynexttest is located in the 'build' subdirectory, -otherwise it is located in tempfile.gettempdir(). On subsequent runs, -the test in pynexttest is run, and the next test is written to pynexttest. -When the last test has been run, pynexttest is deleted. In this way it -is possible to single step through the test files. This is useful when -doing memory analysis on the Python interpreter, which process tends to -consume too many resources to run the full regression test non-stop. - --f reads the names of tests from the file given as f's argument, one -or more test names per line. Whitespace is ignored. Blank lines and -lines beginning with '#' are ignored. This is especially useful for -whittling down failures involving interactions among tests. - --L causes the leaks(1) command to be run just before exit if it exists. -leaks(1) is available on Mac OS X and presumably on some other -FreeBSD-derived systems. - --R runs each test several times and examines sys.gettotalrefcount() to -see if the test appears to be leaking references. The argument should -be of the form stab:run:fname where 'stab' is the number of times the -test is run to let gettotalrefcount settle down, 'run' is the number -of times further it is run and 'fname' is the name of the file the -reports are written to. These parameters all have defaults (5, 4 and -"reflog.txt" respectively), and the minimal invocation is '-R :'. - --M runs tests that require an exorbitant amount of memory. These tests -typically try to ascertain containers keep working when containing more than -2 billion objects, which only works on 64-bit systems. There are also some -tests that try to exhaust the address space of the process, which only makes -sense on 32-bit systems with at least 2Gb of memory. The passed-in memlimit, -which is a string in the form of '2.5Gb', determines howmuch memory the -tests will limit themselves to (but they may go slightly over.) The number -shouldn't be more memory than the machine has (including swap memory). You -should also keep in mind that swap memory is generally much, much slower -than RAM, and setting memlimit to all available RAM or higher will heavily -tax the machine. On the other hand, it is no use running these tests with a -limit of less than 2.5Gb, and many require more than 20Gb. Tests that expect -to use more than memlimit memory will be skipped. The big-memory tests -generally run very, very long. - --u is used to specify which special resource intensive tests to run, -such as those requiring large file support or network connectivity. -The argument is a comma-separated list of words indicating the -resources to test. Currently only the following are defined: - - all - Enable all special resources. - - audio - Tests that use the audio device. (There are known - cases of broken audio drivers that can crash Python or - even the Linux kernel.) - - curses - Tests that use curses and will modify the terminal's - state and output modes. - - largefile - It is okay to run some test that may create huge - files. These tests can take a long time and may - consume >2GB of disk space temporarily. - - network - It is okay to run tests that use external network - resource, e.g. testing SSL support for sockets. - - bsddb - It is okay to run the bsddb testsuite, which takes - a long time to complete. - - decimal - Test the decimal module against a large suite that - verifies compliance with standards. - - cpu - Used for certain CPU-heavy tests. - - subprocess Run all tests for the subprocess module. - - urlfetch - It is okay to download files required on testing. - - gui - Run tests that require a running GUI. - - xpickle - Test pickle and cPickle against Python 2.4, 2.5 and 2.6 to - test backwards compatibility. These tests take a long time - to run. - -To enable all resources except one, use '-uall,-'. For -example, to run all the tests except for the bsddb tests, give the -option '-uall,-bsddb'. -""" - -from __future__ import print_function - -import StringIO -import getopt -import json -import os -import random -import re -import shutil -import sys -import time -import traceback -import warnings -import unittest -import tempfile -import imp -import platform -import sysconfig - - -# Some times __path__ and __file__ are not absolute (e.g. while running from -# Lib/) and, if we change the CWD to run the tests in a temporary dir, some -# imports might fail. This affects only the modules imported before os.chdir(). -# These modules are searched first in sys.path[0] (so '' -- the CWD) and if -# they are found in the CWD their __file__ and __path__ will be relative (this -# happens before the chdir). All the modules imported after the chdir, are -# not found in the CWD, and since the other paths in sys.path[1:] are absolute -# (site.py absolutize them), the __file__ and __path__ will be absolute too. -# Therefore it is necessary to absolutize manually the __file__ and __path__ of -# the packages to prevent later imports to fail when the CWD is different. -for module in sys.modules.itervalues(): - if hasattr(module, '__path__'): - module.__path__ = [os.path.abspath(path) for path in module.__path__] - if hasattr(module, '__file__'): - module.__file__ = os.path.abspath(module.__file__) - - -# MacOSX (a.k.a. Darwin) has a default stack size that is too small -# for deeply recursive regular expressions. We see this as crashes in -# the Python test suite when running test_re.py and test_sre.py. The -# fix is to set the stack limit to 2048. -# This approach may also be useful for other Unixy platforms that -# suffer from small default stack limits. -if sys.platform == 'darwin': - try: - import resource - except ImportError: - pass - else: - soft, hard = resource.getrlimit(resource.RLIMIT_STACK) - newsoft = min(hard, max(soft, 1024*2048)) - resource.setrlimit(resource.RLIMIT_STACK, (newsoft, hard)) - -# Test result constants. -PASSED = 1 -FAILED = 0 -ENV_CHANGED = -1 -SKIPPED = -2 -RESOURCE_DENIED = -3 -INTERRUPTED = -4 - -from test import test_support - -RESOURCE_NAMES = ('audio', 'curses', 'largefile', 'network', 'bsddb', - 'decimal', 'cpu', 'subprocess', 'urlfetch', 'gui', - 'xpickle') - -TEMPDIR = os.path.abspath(tempfile.gettempdir()) - - -def usage(code, msg=''): - print(__doc__) - if msg: print(msg) - sys.exit(code) - - -def main(tests=None, testdir=None, verbose=0, quiet=False, - exclude=False, single=False, randomize=False, fromfile=None, - findleaks=False, use_resources=None, trace=False, coverdir='coverage', - runleaks=False, huntrleaks=False, verbose2=False, print_slow=False, - random_seed=None, use_mp=None, verbose3=False, forever=False, - header=False): - """Execute a test suite. - - This also parses command-line options and modifies its behavior - accordingly. - - tests -- a list of strings containing test names (optional) - testdir -- the directory in which to look for tests (optional) - - Users other than the Python test suite will certainly want to - specify testdir; if it's omitted, the directory containing the - Python test suite is searched for. - - If the tests argument is omitted, the tests listed on the - command-line will be used. If that's empty, too, then all *.py - files beginning with test_ will be used. - - The other default arguments (verbose, quiet, exclude, - single, randomize, findleaks, use_resources, trace, coverdir, - print_slow, and random_seed) allow programmers calling main() - directly to set the values that would normally be set by flags - on the command line. - """ - - test_support.record_original_stdout(sys.stdout) - try: - opts, args = getopt.getopt(sys.argv[1:], 'hvqxsSrf:lu:t:TD:NLR:FwWM:j:', - ['help', 'verbose', 'verbose2', 'verbose3', 'quiet', - 'exclude', 'single', 'slow', 'randomize', 'fromfile=', 'findleaks', - 'use=', 'threshold=', 'trace', 'coverdir=', 'nocoverdir', - 'runleaks', 'huntrleaks=', 'memlimit=', 'randseed=', - 'multiprocess=', 'slaveargs=', 'forever', 'header']) - except getopt.error as msg: - usage(2, msg) - - # Defaults - if random_seed is None: - random_seed = random.randrange(10000000) - if use_resources is None: - use_resources = [] - for o, a in opts: - if o in ('-h', '--help'): - usage(0) - elif o in ('-v', '--verbose'): - verbose += 1 - elif o in ('-w', '--verbose2'): - verbose2 = True - elif o in ('-W', '--verbose3'): - verbose3 = True - elif o in ('-q', '--quiet'): - quiet = True; - verbose = 0 - elif o in ('-x', '--exclude'): - exclude = True - elif o in ('-s', '--single'): - single = True - elif o in ('-S', '--slow'): - print_slow = True - elif o in ('-r', '--randomize'): - randomize = True - elif o == '--randseed': - random_seed = int(a) - elif o in ('-f', '--fromfile'): - fromfile = a - elif o in ('-l', '--findleaks'): - findleaks = True - elif o in ('-L', '--runleaks'): - runleaks = True - elif o in ('-t', '--threshold'): - import gc - gc.set_threshold(int(a)) - elif o in ('-T', '--coverage'): - trace = True - elif o in ('-D', '--coverdir'): - coverdir = os.path.join(os.getcwd(), a) - elif o in ('-N', '--nocoverdir'): - coverdir = None - elif o in ('-R', '--huntrleaks'): - huntrleaks = a.split(':') - if len(huntrleaks) not in (2, 3): - print(a, huntrleaks) - usage(2, '-R takes 2 or 3 colon-separated arguments') - if not huntrleaks[0]: - huntrleaks[0] = 5 - else: - huntrleaks[0] = int(huntrleaks[0]) - if not huntrleaks[1]: - huntrleaks[1] = 4 - else: - huntrleaks[1] = int(huntrleaks[1]) - if len(huntrleaks) == 2 or not huntrleaks[2]: - huntrleaks[2:] = ["reflog.txt"] - elif o in ('-M', '--memlimit'): - test_support.set_memlimit(a) - elif o in ('-u', '--use'): - u = [x.lower() for x in a.split(',')] - for r in u: - if r == 'all': - use_resources[:] = RESOURCE_NAMES - continue - remove = False - if r[0] == '-': - remove = True - r = r[1:] - if r not in RESOURCE_NAMES: - usage(1, 'Invalid -u/--use option: ' + a) - if remove: - if r in use_resources: - use_resources.remove(r) - elif r not in use_resources: - use_resources.append(r) - elif o in ('-F', '--forever'): - forever = True - elif o in ('-j', '--multiprocess'): - use_mp = int(a) - elif o == '--header': - header = True - elif o == '--slaveargs': - args, kwargs = json.loads(a) - try: - result = runtest(*args, **kwargs) - except BaseException as e: - result = INTERRUPTED, e.__class__.__name__ - print() # Force a newline (just in case) - print(json.dumps(result)) - sys.exit(0) - else: - print(("No handler for option {0}. Please " - "report this as a bug at http://bugs.python.org.").format(o), file=sys.stderr) - sys.exit(1) - if single and fromfile: - usage(2, "-s and -f don't go together!") - if use_mp and trace: - usage(2, "-T and -j don't go together!") - if use_mp and findleaks: - usage(2, "-l and -j don't go together!") - - good = [] - bad = [] - skipped = [] - resource_denieds = [] - environment_changed = [] - interrupted = False - - if findleaks: - try: - import gc - except ImportError: - print('No GC available, disabling findleaks.') - findleaks = False - else: - # Uncomment the line below to report garbage that is not - # freeable by reference counting alone. By default only - # garbage that is not collectable by the GC is reported. - #gc.set_debug(gc.DEBUG_SAVEALL) - found_garbage = [] - - if single: - filename = os.path.join(TEMPDIR, 'pynexttest') - try: - fp = open(filename, 'r') - next_test = fp.read().strip() - tests = [next_test] - fp.close() - except IOError: - pass - - if fromfile: - tests = [] - fp = open(os.path.join(test_support.SAVEDCWD, fromfile)) - for line in fp: - guts = line.split() # assuming no test has whitespace in its name - if guts and not guts[0].startswith('#'): - tests.extend(guts) - fp.close() - - # Strip .py extensions. - removepy(args) - removepy(tests) - - stdtests = STDTESTS[:] - nottests = NOTTESTS.copy() - if exclude: - for arg in args: - if arg in stdtests: - stdtests.remove(arg) - nottests.add(arg) - args = [] - - # For a partial run, we do not need to clutter the output. - if verbose or header or not (quiet or single or tests or args): - # Print basic platform information - print("==", platform.python_implementation(), \ - " ".join(sys.version.split())) - print("== ", platform.platform(aliased=True), \ - "%s-endian" % sys.byteorder) - print("== ", os.getcwd()) - print("Testing with flags:", sys.flags) - - alltests = findtests(testdir, stdtests, nottests) - selected = tests or args or alltests - if single: - selected = selected[:1] - try: - next_single_test = alltests[alltests.index(selected[0])+1] - except IndexError: - next_single_test = None - if randomize: - random.seed(random_seed) - print("Using random seed", random_seed) - random.shuffle(selected) - if trace: - import trace - tracer = trace.Trace(ignoredirs=[sys.prefix, sys.exec_prefix], - trace=False, count=True) - - test_times = [] - test_support.use_resources = use_resources - save_modules = sys.modules.keys() - - def accumulate_result(test, result): - ok, test_time = result - test_times.append((test_time, test)) - if ok == PASSED: - good.append(test) - elif ok == FAILED: - bad.append(test) - elif ok == ENV_CHANGED: - bad.append(test) - environment_changed.append(test) - elif ok == SKIPPED: - skipped.append(test) - elif ok == RESOURCE_DENIED: - skipped.append(test) - resource_denieds.append(test) - - if forever: - def test_forever(tests=list(selected)): - while True: - for test in tests: - yield test - if bad: - return - tests = test_forever() - else: - tests = iter(selected) - - if use_mp: - try: - from threading import Thread - except ImportError: - print("Multiprocess option requires thread support") - sys.exit(2) - from Queue import Queue - from subprocess import Popen, PIPE - debug_output_pat = re.compile(r"\[\d+ refs\]$") - output = Queue() - def tests_and_args(): - for test in tests: - args_tuple = ( - (test, verbose, quiet), - dict(huntrleaks=huntrleaks, use_resources=use_resources) - ) - yield (test, args_tuple) - pending = tests_and_args() - opt_args = test_support.args_from_interpreter_flags() - base_cmd = [sys.executable] + opt_args + ['-m', 'test.regrtest'] - def work(): - # A worker thread. - try: - while True: - try: - test, args_tuple = next(pending) - except StopIteration: - output.put((None, None, None, None)) - return - # -E is needed by some tests, e.g. test_import - popen = Popen(base_cmd + ['--slaveargs', json.dumps(args_tuple)], - stdout=PIPE, stderr=PIPE, - universal_newlines=True, - close_fds=(os.name != 'nt')) - stdout, stderr = popen.communicate() - # Strip last refcount output line if it exists, since it - # comes from the shutdown of the interpreter in the subcommand. - stderr = debug_output_pat.sub("", stderr) - stdout, _, result = stdout.strip().rpartition("\n") - if not result: - output.put((None, None, None, None)) - return - result = json.loads(result) - if not quiet: - stdout = test+'\n'+stdout - output.put((test, stdout.rstrip(), stderr.rstrip(), result)) - except BaseException: - output.put((None, None, None, None)) - raise - workers = [Thread(target=work) for i in range(use_mp)] - for worker in workers: - worker.start() - finished = 0 - try: - while finished < use_mp: - test, stdout, stderr, result = output.get() - if test is None: - finished += 1 - continue - if stdout: - print(stdout) - if stderr: - print(stderr, file=sys.stderr) - sys.stdout.flush() - sys.stderr.flush() - if result[0] == INTERRUPTED: - assert result[1] == 'KeyboardInterrupt' - raise KeyboardInterrupt # What else? - accumulate_result(test, result) - except KeyboardInterrupt: - interrupted = True - pending.close() - for worker in workers: - worker.join() - else: - for test in tests: - if not quiet: - print(test) - sys.stdout.flush() - if trace: - # If we're tracing code coverage, then we don't exit with status - # if on a false return value from main. - tracer.runctx('runtest(test, verbose, quiet)', - globals=globals(), locals=vars()) - else: - try: - result = runtest(test, verbose, quiet, huntrleaks) - accumulate_result(test, result) - if verbose3 and result[0] == FAILED: - print("Re-running test %r in verbose mode" % test) - runtest(test, True, quiet, huntrleaks) - except KeyboardInterrupt: - interrupted = True - break - except: - raise - if findleaks: - gc.collect() - if gc.garbage: - print("Warning: test created", len(gc.garbage), end=' ') - print("uncollectable object(s).") - # move the uncollectable objects somewhere so we don't see - # them again - found_garbage.extend(gc.garbage) - del gc.garbage[:] - # Unload the newly imported modules (best effort finalization) - for module in sys.modules.keys(): - if module not in save_modules and module.startswith("test."): - test_support.unload(module) - - if interrupted: - # print a newline after ^C - print() - print("Test suite interrupted by signal SIGINT.") - omitted = set(selected) - set(good) - set(bad) - set(skipped) - print(count(len(omitted), "test"), "omitted:") - printlist(omitted) - if good and not quiet: - if not bad and not skipped and not interrupted and len(good) > 1: - print("All", end=' ') - print(count(len(good), "test"), "OK.") - if print_slow: - test_times.sort(reverse=True) - print("10 slowest tests:") - for time, test in test_times[:10]: - print("%s: %.1fs" % (test, time)) - if bad: - bad = set(bad) - set(environment_changed) - if bad: - print(count(len(bad), "test"), "failed:") - printlist(bad) - if environment_changed: - print("{0} altered the execution environment:".format( - count(len(environment_changed), "test"))) - printlist(environment_changed) - if skipped and not quiet: - print(count(len(skipped), "test"), "skipped:") - printlist(skipped) - - e = _ExpectedSkips() - plat = sys.platform - if e.isvalid(): - surprise = set(skipped) - e.getexpected() - set(resource_denieds) - if surprise: - print(count(len(surprise), "skip"), \ - "unexpected on", plat + ":") - printlist(surprise) - else: - print("Those skips are all expected on", plat + ".") - else: - print("Ask someone to teach regrtest.py about which tests are") - print("expected to get skipped on", plat + ".") - - if verbose2 and bad: - print("Re-running failed tests in verbose mode") - for test in bad: - print("Re-running test %r in verbose mode" % test) - sys.stdout.flush() - try: - test_support.verbose = True - ok = runtest(test, True, quiet, huntrleaks) - except KeyboardInterrupt: - # print a newline separate from the ^C - print() - break - except: - raise - - if single: - if next_single_test: - with open(filename, 'w') as fp: - fp.write(next_single_test + '\n') - else: - os.unlink(filename) - - if trace: - r = tracer.results() - r.write_results(show_missing=True, summary=True, coverdir=coverdir) - - if runleaks: - os.system("leaks %d" % os.getpid()) - - sys.exit(len(bad) > 0 or interrupted) - - -STDTESTS = [ - 'test_grammar', - 'test_opcodes', - 'test_dict', - 'test_builtin', - 'test_exceptions', - 'test_types', - 'test_unittest', - 'test_doctest', - 'test_doctest2', -] - -NOTTESTS = set([ - 'test_support', - 'test_future1', - 'test_future2', -]) - -def findtests(testdir=None, stdtests=STDTESTS, nottests=NOTTESTS): - """Return a list of all applicable test modules.""" - testdir = findtestdir(testdir) - names = os.listdir(testdir) - tests = [] - others = set(stdtests) | nottests - for name in names: - modname, ext = os.path.splitext(name) - if modname[:5] == "test_" and ext == ".py" and modname not in others: - tests.append(modname) - return stdtests + sorted(tests) - -def runtest(test, verbose, quiet, - huntrleaks=False, use_resources=None): - """Run a single test. - - test -- the name of the test - verbose -- if true, print more messages - quiet -- if true, don't print 'skipped' messages (probably redundant) - test_times -- a list of (time, test_name) pairs - huntrleaks -- run multiple times to test for leaks; requires a debug - build; a triple corresponding to -R's three arguments - Returns one of the test result constants: - INTERRUPTED KeyboardInterrupt when run under -j - RESOURCE_DENIED test skipped because resource denied - SKIPPED test skipped for some other reason - ENV_CHANGED test failed because it changed the execution environment - FAILED test failed - PASSED test passed - """ - - test_support.verbose = verbose # Tell tests to be moderately quiet - if use_resources is not None: - test_support.use_resources = use_resources - try: - return runtest_inner(test, verbose, quiet, huntrleaks) - finally: - cleanup_test_droppings(test, verbose) - - -# Unit tests are supposed to leave the execution environment unchanged -# once they complete. But sometimes tests have bugs, especially when -# tests fail, and the changes to environment go on to mess up other -# tests. This can cause issues with buildbot stability, since tests -# are run in random order and so problems may appear to come and go. -# There are a few things we can save and restore to mitigate this, and -# the following context manager handles this task. - -class saved_test_environment(object): - """Save bits of the test environment and restore them at block exit. - - with saved_test_environment(testname, verbose, quiet): - #stuff - - Unless quiet is True, a warning is printed to stderr if any of - the saved items was changed by the test. The attribute 'changed' - is initially False, but is set to True if a change is detected. - - If verbose is more than 1, the before and after state of changed - items is also printed. - """ - - changed = False - - def __init__(self, testname, verbose=0, quiet=False): - self.testname = testname - self.verbose = verbose - self.quiet = quiet - - # To add things to save and restore, add a name XXX to the resources list - # and add corresponding get_XXX/restore_XXX functions. get_XXX should - # return the value to be saved and compared against a second call to the - # get function when test execution completes. restore_XXX should accept - # the saved value and restore the resource using it. It will be called if - # and only if a change in the value is detected. - # - # Note: XXX will have any '.' replaced with '_' characters when determining - # the corresponding method names. - - resources = ('sys.argv', 'cwd', 'sys.stdin', 'sys.stdout', 'sys.stderr', - 'os.environ', 'sys.path', 'asyncore.socket_map', - 'test_support.TESTFN', - ) - - def get_sys_argv(self): - return id(sys.argv), sys.argv, sys.argv[:] - def restore_sys_argv(self, saved_argv): - sys.argv = saved_argv[1] - sys.argv[:] = saved_argv[2] - - def get_cwd(self): - return os.getcwd() - def restore_cwd(self, saved_cwd): - os.chdir(saved_cwd) - - def get_sys_stdout(self): - return sys.stdout - def restore_sys_stdout(self, saved_stdout): - sys.stdout = saved_stdout - - def get_sys_stderr(self): - return sys.stderr - def restore_sys_stderr(self, saved_stderr): - sys.stderr = saved_stderr - - def get_sys_stdin(self): - return sys.stdin - def restore_sys_stdin(self, saved_stdin): - sys.stdin = saved_stdin - - def get_os_environ(self): - return id(os.environ), os.environ, dict(os.environ) - def restore_os_environ(self, saved_environ): - os.environ = saved_environ[1] - os.environ.clear() - os.environ.update(saved_environ[2]) - - def get_sys_path(self): - return id(sys.path), sys.path, sys.path[:] - def restore_sys_path(self, saved_path): - sys.path = saved_path[1] - sys.path[:] = saved_path[2] - - def get_asyncore_socket_map(self): - asyncore = sys.modules.get('asyncore') - # XXX Making a copy keeps objects alive until __exit__ gets called. - return asyncore and asyncore.socket_map.copy() or {} - def restore_asyncore_socket_map(self, saved_map): - asyncore = sys.modules.get('asyncore') - if asyncore is not None: - asyncore.close_all(ignore_all=True) - asyncore.socket_map.update(saved_map) - - def get_test_support_TESTFN(self): - if os.path.isfile(test_support.TESTFN): - result = 'f' - elif os.path.isdir(test_support.TESTFN): - result = 'd' - else: - result = None - return result - def restore_test_support_TESTFN(self, saved_value): - if saved_value is None: - if os.path.isfile(test_support.TESTFN): - os.unlink(test_support.TESTFN) - elif os.path.isdir(test_support.TESTFN): - shutil.rmtree(test_support.TESTFN) - - def resource_info(self): - for name in self.resources: - method_suffix = name.replace('.', '_') - get_name = 'get_' + method_suffix - restore_name = 'restore_' + method_suffix - yield name, getattr(self, get_name), getattr(self, restore_name) - - def __enter__(self): - self.saved_values = dict((name, get()) for name, get, restore - in self.resource_info()) - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - saved_values = self.saved_values - del self.saved_values - for name, get, restore in self.resource_info(): - current = get() - original = saved_values.pop(name) - # Check for changes to the resource's value - if current != original: - self.changed = True - restore(original) - if not self.quiet: - print(( - "Warning -- {0} was modified by {1}".format( - name, self.testname)), file=sys.stderr) - if self.verbose > 1: - print(( - " Before: {0}\n After: {1} ".format( - original, current)), file=sys.stderr) - # XXX (ncoghlan): for most resources (e.g. sys.path) identity - # matters at least as much as value. For others (e.g. cwd), - # identity is irrelevant. Should we add a mechanism to check - # for substitution in the cases where it matters? - return False - - -def runtest_inner(test, verbose, quiet, huntrleaks=False): - test_support.unload(test) - if verbose: - capture_stdout = None - else: - capture_stdout = StringIO.StringIO() - - test_time = 0.0 - refleak = False # True if the test leaked references. - try: - save_stdout = sys.stdout - try: - if capture_stdout: - sys.stdout = capture_stdout - if test.startswith('test.'): - abstest = test - else: - # Always import it from the test package - abstest = 'test.' + test - with saved_test_environment(test, verbose, quiet) as environment: - start_time = time.time() - the_package = __import__(abstest, globals(), locals(), []) - the_module = getattr(the_package, test) - # Old tests run to completion simply as a side-effect of - # being imported. For tests based on unittest or doctest, - # explicitly invoke their test_main() function (if it exists). - indirect_test = getattr(the_module, "test_main", None) - if indirect_test is not None: - indirect_test() - if huntrleaks: - refleak = dash_R(the_module, test, indirect_test, - huntrleaks) - test_time = time.time() - start_time - finally: - sys.stdout = save_stdout - except test_support.ResourceDenied as msg: - if not quiet: - print(test, "skipped --", msg) - sys.stdout.flush() - return RESOURCE_DENIED, test_time - except unittest.SkipTest as msg: - if not quiet: - print(test, "skipped --", msg) - sys.stdout.flush() - return SKIPPED, test_time - except KeyboardInterrupt: - raise - except test_support.TestFailed as msg: - print("test", test, "failed --", msg, file=sys.stderr) - sys.stderr.flush() - return FAILED, test_time - except: - type, value = sys.exc_info()[:2] - print("test", test, "crashed --", str(type) + ":", value, file=sys.stderr) - sys.stderr.flush() - if verbose: - traceback.print_exc(file=sys.stderr) - sys.stderr.flush() - return FAILED, test_time - else: - if refleak: - return FAILED, test_time - if environment.changed: - return ENV_CHANGED, test_time - # Except in verbose mode, tests should not print anything - if verbose or huntrleaks: - return PASSED, test_time - output = capture_stdout.getvalue() - if not output: - return PASSED, test_time - print("test", test, "produced unexpected output:") - print("*" * 70) - print(output) - print("*" * 70) - sys.stdout.flush() - return FAILED, test_time - -def cleanup_test_droppings(testname, verbose): - import stat - import gc - - # First kill any dangling references to open files etc. - gc.collect() - - # Try to clean up junk commonly left behind. While tests shouldn't leave - # any files or directories behind, when a test fails that can be tedious - # for it to arrange. The consequences can be especially nasty on Windows, - # since if a test leaves a file open, it cannot be deleted by name (while - # there's nothing we can do about that here either, we can display the - # name of the offending test, which is a real help). - for name in (test_support.TESTFN, - "db_home", - ): - if not os.path.exists(name): - continue - - if os.path.isdir(name): - kind, nuker = "directory", shutil.rmtree - elif os.path.isfile(name): - kind, nuker = "file", os.unlink - else: - raise SystemError("os.path says %r exists but is neither " - "directory nor file" % name) - - if verbose: - print("%r left behind %s %r" % (testname, kind, name)) - try: - # if we have chmod, fix possible permissions problems - # that might prevent cleanup - if (hasattr(os, 'chmod')): - os.chmod(name, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) - nuker(name) - except Exception as msg: - print(("%r left behind %s %r and it couldn't be " - "removed: %s" % (testname, kind, name, msg)), file=sys.stderr) - -def dash_R(the_module, test, indirect_test, huntrleaks): - """Run a test multiple times, looking for reference leaks. - - Returns: - False if the test didn't leak references; True if we detected refleaks. - """ - # This code is hackish and inelegant, but it seems to do the job. - import copy_reg, _abcoll, _pyio - - if not hasattr(sys, 'gettotalrefcount'): - raise Exception("Tracking reference leaks requires a debug build " - "of Python") - - # Save current values for dash_R_cleanup() to restore. - fs = warnings.filters[:] - ps = copy_reg.dispatch_table.copy() - pic = sys.path_importer_cache.copy() - try: - import zipimport - except ImportError: - zdc = None # Run unmodified on platforms without zipimport support - else: - zdc = zipimport._zip_directory_cache.copy() - abcs = {} - modules = _abcoll, _pyio - for abc in [getattr(mod, a) for mod in modules for a in mod.__all__]: - # XXX isinstance(abc, ABCMeta) leads to infinite recursion - if not hasattr(abc, '_abc_registry'): - continue - for obj in abc.__subclasses__() + [abc]: - abcs[obj] = obj._abc_registry.copy() - - if indirect_test: - def run_the_test(): - indirect_test() - else: - def run_the_test(): - imp.reload(the_module) - - deltas = [] - nwarmup, ntracked, fname = huntrleaks - fname = os.path.join(test_support.SAVEDCWD, fname) - repcount = nwarmup + ntracked - print("beginning", repcount, "repetitions", file=sys.stderr) - print(("1234567890"*(repcount//10 + 1))[:repcount], file=sys.stderr) - dash_R_cleanup(fs, ps, pic, zdc, abcs) - for i in range(repcount): - rc_before = sys.gettotalrefcount() - run_the_test() - sys.stderr.write('.') - dash_R_cleanup(fs, ps, pic, zdc, abcs) - rc_after = sys.gettotalrefcount() - if i >= nwarmup: - deltas.append(rc_after - rc_before) - print(file=sys.stderr) - if any(deltas): - msg = '%s leaked %s references, sum=%s' % (test, deltas, sum(deltas)) - print(msg, file=sys.stderr) - with open(fname, "a") as refrep: - print(msg, file=refrep) - refrep.flush() - return True - return False - -def dash_R_cleanup(fs, ps, pic, zdc, abcs): - import gc, copy_reg - import _strptime, linecache - dircache = test_support.import_module('dircache', deprecated=True) - import urlparse, urllib, urllib2, mimetypes, doctest - import struct, filecmp - from distutils.dir_util import _path_created - - # Clear the warnings registry, so they can be displayed again - for mod in sys.modules.values(): - if hasattr(mod, '__warningregistry__'): - del mod.__warningregistry__ - - # Restore some original values. - warnings.filters[:] = fs - copy_reg.dispatch_table.clear() - copy_reg.dispatch_table.update(ps) - sys.path_importer_cache.clear() - sys.path_importer_cache.update(pic) - try: - import zipimport - except ImportError: - pass # Run unmodified on platforms without zipimport support - else: - zipimport._zip_directory_cache.clear() - zipimport._zip_directory_cache.update(zdc) - - # clear type cache - sys._clear_type_cache() - - # Clear ABC registries, restoring previously saved ABC registries. - for abc, registry in abcs.items(): - abc._abc_registry = registry.copy() - abc._abc_cache.clear() - abc._abc_negative_cache.clear() - - # Clear assorted module caches. - _path_created.clear() - re.purge() - _strptime._regex_cache.clear() - urlparse.clear_cache() - urllib.urlcleanup() - urllib2.install_opener(None) - dircache.reset() - linecache.clearcache() - mimetypes._default_mime_types() - filecmp._cache.clear() - struct._clearcache() - doctest.master = None - try: - import ctypes - except ImportError: - # Don't worry about resetting the cache if ctypes is not supported - pass - else: - ctypes._reset_cache() - - # Collect cyclic trash. - gc.collect() - -def findtestdir(path=None): - return path or os.path.dirname(__file__) or os.curdir - -def removepy(names): - if not names: - return - for idx, name in enumerate(names): - basename, ext = os.path.splitext(name) - if ext == '.py': - names[idx] = basename - -def count(n, word): - if n == 1: - return "%d %s" % (n, word) - else: - return "%d %ss" % (n, word) - -def printlist(x, width=70, indent=4): - """Print the elements of iterable x to stdout. - - Optional arg width (default 70) is the maximum line length. - Optional arg indent (default 4) is the number of blanks with which to - begin each line. - """ - - from textwrap import fill - blanks = ' ' * indent - # Print the sorted list: 'x' may be a '--random' list or a set() - print(fill(' '.join(str(elt) for elt in sorted(x)), width, - initial_indent=blanks, subsequent_indent=blanks)) - -# Map sys.platform to a string containing the basenames of tests -# expected to be skipped on that platform. -# -# Special cases: -# test_pep277 -# The _ExpectedSkips constructor adds this to the set of expected -# skips if not os.path.supports_unicode_filenames. -# test_timeout -# Controlled by test_timeout.skip_expected. Requires the network -# resource and a socket module. -# -# Tests that are expected to be skipped everywhere except on one platform -# are also handled separately. - -_expectations = { - 'win32': - """ - test__locale - test_bsddb185 - test_bsddb3 - test_commands - test_crypt - test_curses - test_dbm - test_dl - test_fcntl - test_fork1 - test_epoll - test_gdbm - test_grp - test_ioctl - test_largefile - test_kqueue - test_mhlib - test_openpty - test_ossaudiodev - test_pipes - test_poll - test_posix - test_pty - test_pwd - test_resource - test_signal - test_threadsignals - test_timing - test_wait3 - test_wait4 - """, - 'linux2': - """ - test_bsddb185 - test_curses - test_dl - test_largefile - test_kqueue - test_ossaudiodev - """, - 'unixware7': - """ - test_bsddb - test_bsddb185 - test_dl - test_epoll - test_largefile - test_kqueue - test_minidom - test_openpty - test_pyexpat - test_sax - test_sundry - """, - 'openunix8': - """ - test_bsddb - test_bsddb185 - test_dl - test_epoll - test_largefile - test_kqueue - test_minidom - test_openpty - test_pyexpat - test_sax - test_sundry - """, - 'sco_sv3': - """ - test_asynchat - test_bsddb - test_bsddb185 - test_dl - test_fork1 - test_epoll - test_gettext - test_largefile - test_locale - test_kqueue - test_minidom - test_openpty - test_pyexpat - test_queue - test_sax - test_sundry - test_thread - test_threaded_import - test_threadedtempfile - test_threading - """, - 'riscos': - """ - test_asynchat - test_atexit - test_bsddb - test_bsddb185 - test_bsddb3 - test_commands - test_crypt - test_dbm - test_dl - test_fcntl - test_fork1 - test_epoll - test_gdbm - test_grp - test_largefile - test_locale - test_kqueue - test_mmap - test_openpty - test_poll - test_popen2 - test_pty - test_pwd - test_strop - test_sundry - test_thread - test_threaded_import - test_threadedtempfile - test_threading - test_timing - """, - 'darwin': - """ - test__locale - test_bsddb - test_bsddb3 - test_curses - test_epoll - test_gdb - test_gdbm - test_largefile - test_locale - test_kqueue - test_minidom - test_ossaudiodev - test_poll - """, - 'sunos5': - """ - test_bsddb - test_bsddb185 - test_curses - test_dbm - test_epoll - test_kqueue - test_gdbm - test_gzip - test_openpty - test_zipfile - test_zlib - """, - 'hp-ux11': - """ - test_bsddb - test_bsddb185 - test_curses - test_dl - test_epoll - test_gdbm - test_gzip - test_largefile - test_locale - test_kqueue - test_minidom - test_openpty - test_pyexpat - test_sax - test_zipfile - test_zlib - """, - 'atheos': - """ - test_bsddb185 - test_curses - test_dl - test_gdbm - test_epoll - test_largefile - test_locale - test_kqueue - test_mhlib - test_mmap - test_poll - test_popen2 - test_resource - """, - 'cygwin': - """ - test_bsddb185 - test_bsddb3 - test_curses - test_dbm - test_epoll - test_ioctl - test_kqueue - test_largefile - test_locale - test_ossaudiodev - test_socketserver - """, - 'os2emx': - """ - test_audioop - test_bsddb185 - test_bsddb3 - test_commands - test_curses - test_dl - test_epoll - test_kqueue - test_largefile - test_mhlib - test_mmap - test_openpty - test_ossaudiodev - test_pty - test_resource - test_signal - """, - 'freebsd4': - """ - test_bsddb - test_bsddb3 - test_epoll - test_gdbm - test_locale - test_ossaudiodev - test_pep277 - test_pty - test_socketserver - test_tcl - test_tk - test_ttk_guionly - test_ttk_textonly - test_timeout - test_urllibnet - test_multiprocessing - """, - 'aix5': - """ - test_bsddb - test_bsddb185 - test_bsddb3 - test_bz2 - test_dl - test_epoll - test_gdbm - test_gzip - test_kqueue - test_ossaudiodev - test_tcl - test_tk - test_ttk_guionly - test_ttk_textonly - test_zipimport - test_zlib - """, - 'openbsd3': - """ - test_ascii_formatd - test_bsddb - test_bsddb3 - test_ctypes - test_dl - test_epoll - test_gdbm - test_locale - test_normalization - test_ossaudiodev - test_pep277 - test_tcl - test_tk - test_ttk_guionly - test_ttk_textonly - test_multiprocessing - """, - 'netbsd3': - """ - test_ascii_formatd - test_bsddb - test_bsddb185 - test_bsddb3 - test_ctypes - test_curses - test_dl - test_epoll - test_gdbm - test_locale - test_ossaudiodev - test_pep277 - test_tcl - test_tk - test_ttk_guionly - test_ttk_textonly - test_multiprocessing - """, -} -_expectations['freebsd5'] = _expectations['freebsd4'] -_expectations['freebsd6'] = _expectations['freebsd4'] -_expectations['freebsd7'] = _expectations['freebsd4'] -_expectations['freebsd8'] = _expectations['freebsd4'] - -class _ExpectedSkips(object): - def __init__(self): - import os.path - from test import test_timeout - - self.valid = False - if sys.platform in _expectations: - s = _expectations[sys.platform] - self.expected = set(s.split()) - - # expected to be skipped on every platform, even Linux - self.expected.add('test_linuxaudiodev') - - if not os.path.supports_unicode_filenames: - self.expected.add('test_pep277') - - if test_timeout.skip_expected: - self.expected.add('test_timeout') - - if sys.maxint == 9223372036854775807: - self.expected.add('test_imageop') - - if sys.platform != "darwin": - MAC_ONLY = ["test_macos", "test_macostools", "test_aepack", - "test_plistlib", "test_scriptpackages", - "test_applesingle"] - for skip in MAC_ONLY: - self.expected.add(skip) - elif len(u'\0'.encode('unicode-internal')) == 4: - self.expected.add("test_macostools") - - - if sys.platform != "win32": - # test_sqlite is only reliable on Windows where the library - # is distributed with Python - WIN_ONLY = ["test_unicode_file", "test_winreg", - "test_winsound", "test_startfile", - "test_sqlite", "test_msilib"] - for skip in WIN_ONLY: - self.expected.add(skip) - - if sys.platform != 'irix': - IRIX_ONLY = ["test_imageop", "test_al", "test_cd", "test_cl", - "test_gl", "test_imgfile"] - for skip in IRIX_ONLY: - self.expected.add(skip) - - if sys.platform != 'sunos5': - self.expected.add('test_sunaudiodev') - self.expected.add('test_nis') - - if not sys.py3kwarning: - self.expected.add('test_py3kwarn') - - self.valid = True - - def isvalid(self): - "Return true iff _ExpectedSkips knows about the current platform." - return self.valid - - def getexpected(self): - """Return set of test names we expect to skip on current platform. - - self.isvalid() must be true. - """ - - assert self.isvalid() - return self.expected - -if __name__ == '__main__': - # findtestdir() gets the dirname out of __file__, so we have to make it - # absolute before changing the working directory. - # For example __file__ may be relative when running trace or profile. - # See issue #9323. - __file__ = os.path.abspath(__file__) - - # sanity check - assert __file__ == os.path.abspath(sys.argv[0]) - - # When tests are run from the Python build directory, it is best practice - # to keep the test files in a subfolder. It eases the cleanup of leftover - # files using command "make distclean". - if sysconfig.is_python_build(): - TEMPDIR = os.path.join(sysconfig.get_config_var('srcdir'), 'build') - TEMPDIR = os.path.abspath(TEMPDIR) - if not os.path.exists(TEMPDIR): - os.mkdir(TEMPDIR) - - # Define a writable temp dir that will be used as cwd while running - # the tests. The name of the dir includes the pid to allow parallel - # testing (see the -j option). - TESTCWD = 'test_python_{0}'.format(os.getpid()) - - TESTCWD = os.path.join(TEMPDIR, TESTCWD) - - # Run the tests in a context manager that temporary changes the CWD to a - # temporary and writable directory. If it's not possible to create or - # change the CWD, the original CWD will be used. The original CWD is - # available from test_support.SAVEDCWD. - with test_support.temp_cwd(TESTCWD, quiet=True): - main() diff --git a/future/standard_library/test/string_tests.py b/future/standard_library/test/string_tests.py deleted file mode 100644 index 96f3ea94..00000000 --- a/future/standard_library/test/string_tests.py +++ /dev/null @@ -1,1392 +0,0 @@ -""" -Common tests shared by test_str, test_unicode, test_userstring and test_string. -""" -from __future__ import (absolute_import, division, - print_function, unicode_literals) -from future import standard_library -from future.builtins import * - -import string -import sys -import struct -from test import support -from collections import UserList -import _testcapi - -class Sequence(object): - def __init__(self, seq='wxyz'): self.seq = seq - def __len__(self): return len(self.seq) - def __getitem__(self, i): return self.seq[i] - -class BadSeq1(Sequence): - def __init__(self): self.seq = [7, 'hello', 123] - def __str__(self): return '{0} {1} {2}'.format(*self.seq) - -class BadSeq2(Sequence): - def __init__(self): self.seq = ['a', 'b', 'c'] - def __len__(self): return 8 - -class BaseTest(object): - # These tests are for buffers of values (bytes) and not - # specific to character interpretation, used for bytes objects - # and various string implementations - - # The type to be tested - # Change in subclasses to change the behaviour of fixtesttype() - type2test = None - - # Whether the "contained items" of the container are integers in - # range(0, 256) (i.e. bytes, bytearray) or strings of length 1 - # (str) - contains_bytes = False - - # All tests pass their arguments to the testing methods - # as str objects. fixtesttype() can be used to propagate - # these arguments to the appropriate type - def fixtype(self, obj): - if isinstance(obj, str): - return self.__class__.type2test(obj) - elif isinstance(obj, list): - return [self.fixtype(x) for x in obj] - elif isinstance(obj, tuple): - return tuple([self.fixtype(x) for x in obj]) - elif isinstance(obj, dict): - return dict([ - (self.fixtype(key), self.fixtype(value)) - for (key, value) in obj.items() - ]) - else: - return obj - - # check that obj.method(*args) returns result - def checkequal(self, result, obj, methodname, *args, **kwargs): - result = self.fixtype(result) - obj = self.fixtype(obj) - args = self.fixtype(args) - kwargs = dict((k, self.fixtype(v)) for k,v in kwargs.items()) - realresult = getattr(obj, methodname)(*args, **kwargs) - self.assertEqual( - result, - realresult - ) - # if the original is returned make sure that - # this doesn't happen with subclasses - if obj is realresult: - try: - class subtype(self.__class__.type2test): - pass - except TypeError: - pass # Skip this if we can't subclass - else: - obj = subtype(obj) - realresult = getattr(obj, methodname)(*args) - self.assertIsNot(obj, realresult) - - # check that obj.method(*args) raises exc - def checkraises(self, exc, obj, methodname, *args): - obj = self.fixtype(obj) - args = self.fixtype(args) - self.assertRaises( - exc, - getattr(obj, methodname), - *args - ) - - # call obj.method(*args) without any checks - def checkcall(self, obj, methodname, *args): - obj = self.fixtype(obj) - args = self.fixtype(args) - getattr(obj, methodname)(*args) - - def test_count(self): - self.checkequal(3, 'aaa', 'count', 'a') - self.checkequal(0, 'aaa', 'count', 'b') - self.checkequal(3, 'aaa', 'count', 'a') - self.checkequal(0, 'aaa', 'count', 'b') - self.checkequal(3, 'aaa', 'count', 'a') - self.checkequal(0, 'aaa', 'count', 'b') - self.checkequal(0, 'aaa', 'count', 'b') - self.checkequal(2, 'aaa', 'count', 'a', 1) - self.checkequal(0, 'aaa', 'count', 'a', 10) - self.checkequal(1, 'aaa', 'count', 'a', -1) - self.checkequal(3, 'aaa', 'count', 'a', -10) - self.checkequal(1, 'aaa', 'count', 'a', 0, 1) - self.checkequal(3, 'aaa', 'count', 'a', 0, 10) - self.checkequal(2, 'aaa', 'count', 'a', 0, -1) - self.checkequal(0, 'aaa', 'count', 'a', 0, -10) - self.checkequal(3, 'aaa', 'count', '', 1) - self.checkequal(1, 'aaa', 'count', '', 3) - self.checkequal(0, 'aaa', 'count', '', 10) - self.checkequal(2, 'aaa', 'count', '', -1) - self.checkequal(4, 'aaa', 'count', '', -10) - - self.checkequal(1, '', 'count', '') - self.checkequal(0, '', 'count', '', 1, 1) - self.checkequal(0, '', 'count', '', sys.maxsize, 0) - - self.checkequal(0, '', 'count', 'xx') - self.checkequal(0, '', 'count', 'xx', 1, 1) - self.checkequal(0, '', 'count', 'xx', sys.maxsize, 0) - - self.checkraises(TypeError, 'hello', 'count') - - if self.contains_bytes: - self.checkequal(0, 'hello', 'count', 42) - else: - self.checkraises(TypeError, 'hello', 'count', 42) - - # For a variety of combinations, - # verify that str.count() matches an equivalent function - # replacing all occurrences and then differencing the string lengths - charset = ['', 'a', 'b'] - digits = 7 - base = len(charset) - teststrings = set() - for i in range(base ** digits): - entry = [] - for j in range(digits): - i, m = divmod(i, base) - entry.append(charset[m]) - teststrings.add(''.join(entry)) - teststrings = [self.fixtype(ts) for ts in teststrings] - for i in teststrings: - n = len(i) - for j in teststrings: - r1 = i.count(j) - if j: - r2, rem = divmod(n - len(i.replace(j, self.fixtype(''))), - len(j)) - else: - r2, rem = len(i)+1, 0 - if rem or r1 != r2: - self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i)) - self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i)) - - def test_find(self): - self.checkequal(0, 'abcdefghiabc', 'find', 'abc') - self.checkequal(9, 'abcdefghiabc', 'find', 'abc', 1) - self.checkequal(-1, 'abcdefghiabc', 'find', 'def', 4) - - self.checkequal(0, 'abc', 'find', '', 0) - self.checkequal(3, 'abc', 'find', '', 3) - self.checkequal(-1, 'abc', 'find', '', 4) - - # to check the ability to pass None as defaults - self.checkequal( 2, 'rrarrrrrrrrra', 'find', 'a') - self.checkequal(12, 'rrarrrrrrrrra', 'find', 'a', 4) - self.checkequal(-1, 'rrarrrrrrrrra', 'find', 'a', 4, 6) - self.checkequal(12, 'rrarrrrrrrrra', 'find', 'a', 4, None) - self.checkequal( 2, 'rrarrrrrrrrra', 'find', 'a', None, 6) - - self.checkraises(TypeError, 'hello', 'find') - - if self.contains_bytes: - self.checkequal(-1, 'hello', 'find', 42) - else: - self.checkraises(TypeError, 'hello', 'find', 42) - - self.checkequal(0, '', 'find', '') - self.checkequal(-1, '', 'find', '', 1, 1) - self.checkequal(-1, '', 'find', '', sys.maxsize, 0) - - self.checkequal(-1, '', 'find', 'xx') - self.checkequal(-1, '', 'find', 'xx', 1, 1) - self.checkequal(-1, '', 'find', 'xx', sys.maxsize, 0) - - # issue 7458 - self.checkequal(-1, 'ab', 'find', 'xxx', sys.maxsize + 1, 0) - - # For a variety of combinations, - # verify that str.find() matches __contains__ - # and that the found substring is really at that location - charset = ['', 'a', 'b', 'c'] - digits = 5 - base = len(charset) - teststrings = set() - for i in range(base ** digits): - entry = [] - for j in range(digits): - i, m = divmod(i, base) - entry.append(charset[m]) - teststrings.add(''.join(entry)) - teststrings = [self.fixtype(ts) for ts in teststrings] - for i in teststrings: - for j in teststrings: - loc = i.find(j) - r1 = (loc != -1) - r2 = j in i - self.assertEqual(r1, r2) - if loc != -1: - self.assertEqual(i[loc:loc+len(j)], j) - - def test_rfind(self): - self.checkequal(9, 'abcdefghiabc', 'rfind', 'abc') - self.checkequal(12, 'abcdefghiabc', 'rfind', '') - self.checkequal(0, 'abcdefghiabc', 'rfind', 'abcd') - self.checkequal(-1, 'abcdefghiabc', 'rfind', 'abcz') - - self.checkequal(3, 'abc', 'rfind', '', 0) - self.checkequal(3, 'abc', 'rfind', '', 3) - self.checkequal(-1, 'abc', 'rfind', '', 4) - - # to check the ability to pass None as defaults - self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a') - self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a', 4) - self.checkequal(-1, 'rrarrrrrrrrra', 'rfind', 'a', 4, 6) - self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a', 4, None) - self.checkequal( 2, 'rrarrrrrrrrra', 'rfind', 'a', None, 6) - - self.checkraises(TypeError, 'hello', 'rfind') - - if self.contains_bytes: - self.checkequal(-1, 'hello', 'rfind', 42) - else: - self.checkraises(TypeError, 'hello', 'rfind', 42) - - # For a variety of combinations, - # verify that str.rfind() matches __contains__ - # and that the found substring is really at that location - charset = ['', 'a', 'b', 'c'] - digits = 5 - base = len(charset) - teststrings = set() - for i in range(base ** digits): - entry = [] - for j in range(digits): - i, m = divmod(i, base) - entry.append(charset[m]) - teststrings.add(''.join(entry)) - teststrings = [self.fixtype(ts) for ts in teststrings] - for i in teststrings: - for j in teststrings: - loc = i.rfind(j) - r1 = (loc != -1) - r2 = j in i - self.assertEqual(r1, r2) - if loc != -1: - self.assertEqual(i[loc:loc+len(j)], j) - - # issue 7458 - self.checkequal(-1, 'ab', 'rfind', 'xxx', sys.maxsize + 1, 0) - - # issue #15534 - self.checkequal(0, '<......\u043c...', "rfind", "<") - - def test_index(self): - self.checkequal(0, 'abcdefghiabc', 'index', '') - self.checkequal(3, 'abcdefghiabc', 'index', 'def') - self.checkequal(0, 'abcdefghiabc', 'index', 'abc') - self.checkequal(9, 'abcdefghiabc', 'index', 'abc', 1) - - self.checkraises(ValueError, 'abcdefghiabc', 'index', 'hib') - self.checkraises(ValueError, 'abcdefghiab', 'index', 'abc', 1) - self.checkraises(ValueError, 'abcdefghi', 'index', 'ghi', 8) - self.checkraises(ValueError, 'abcdefghi', 'index', 'ghi', -1) - - # to check the ability to pass None as defaults - self.checkequal( 2, 'rrarrrrrrrrra', 'index', 'a') - self.checkequal(12, 'rrarrrrrrrrra', 'index', 'a', 4) - self.checkraises(ValueError, 'rrarrrrrrrrra', 'index', 'a', 4, 6) - self.checkequal(12, 'rrarrrrrrrrra', 'index', 'a', 4, None) - self.checkequal( 2, 'rrarrrrrrrrra', 'index', 'a', None, 6) - - self.checkraises(TypeError, 'hello', 'index') - - if self.contains_bytes: - self.checkraises(ValueError, 'hello', 'index', 42) - else: - self.checkraises(TypeError, 'hello', 'index', 42) - - def test_rindex(self): - self.checkequal(12, 'abcdefghiabc', 'rindex', '') - self.checkequal(3, 'abcdefghiabc', 'rindex', 'def') - self.checkequal(9, 'abcdefghiabc', 'rindex', 'abc') - self.checkequal(0, 'abcdefghiabc', 'rindex', 'abc', 0, -1) - - self.checkraises(ValueError, 'abcdefghiabc', 'rindex', 'hib') - self.checkraises(ValueError, 'defghiabc', 'rindex', 'def', 1) - self.checkraises(ValueError, 'defghiabc', 'rindex', 'abc', 0, -1) - self.checkraises(ValueError, 'abcdefghi', 'rindex', 'ghi', 0, 8) - self.checkraises(ValueError, 'abcdefghi', 'rindex', 'ghi', 0, -1) - - # to check the ability to pass None as defaults - self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a') - self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a', 4) - self.checkraises(ValueError, 'rrarrrrrrrrra', 'rindex', 'a', 4, 6) - self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a', 4, None) - self.checkequal( 2, 'rrarrrrrrrrra', 'rindex', 'a', None, 6) - - self.checkraises(TypeError, 'hello', 'rindex') - - if self.contains_bytes: - self.checkraises(ValueError, 'hello', 'rindex', 42) - else: - self.checkraises(TypeError, 'hello', 'rindex', 42) - - def test_lower(self): - self.checkequal('hello', 'HeLLo', 'lower') - self.checkequal('hello', 'hello', 'lower') - self.checkraises(TypeError, 'hello', 'lower', 42) - - def test_upper(self): - self.checkequal('HELLO', 'HeLLo', 'upper') - self.checkequal('HELLO', 'HELLO', 'upper') - self.checkraises(TypeError, 'hello', 'upper', 42) - - def test_expandtabs(self): - self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs') - self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 8) - self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 4) - self.checkequal('abc\r\nab def\ng hi', 'abc\r\nab\tdef\ng\thi', 'expandtabs', 4) - self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs') - self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 8) - self.checkequal('abc\r\nab\r\ndef\ng\r\nhi', 'abc\r\nab\r\ndef\ng\r\nhi', 'expandtabs', 4) - self.checkequal(' a\n b', ' \ta\n\tb', 'expandtabs', 1) - - self.checkraises(TypeError, 'hello', 'expandtabs', 42, 42) - # This test is only valid when sizeof(int) == sizeof(void*) == 4. - if sys.maxsize < (1 << 32) and struct.calcsize('P') == 4: - self.checkraises(OverflowError, - '\ta\n\tb', 'expandtabs', sys.maxsize) - - def test_split(self): - # by a char - self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|') - self.checkequal(['a|b|c|d'], 'a|b|c|d', 'split', '|', 0) - self.checkequal(['a', 'b|c|d'], 'a|b|c|d', 'split', '|', 1) - self.checkequal(['a', 'b', 'c|d'], 'a|b|c|d', 'split', '|', 2) - self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|', 3) - self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|', 4) - self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|', - sys.maxsize-2) - self.checkequal(['a|b|c|d'], 'a|b|c|d', 'split', '|', 0) - self.checkequal(['a', '', 'b||c||d'], 'a||b||c||d', 'split', '|', 2) - self.checkequal(['endcase ', ''], 'endcase |', 'split', '|') - self.checkequal(['', ' startcase'], '| startcase', 'split', '|') - self.checkequal(['', 'bothcase', ''], '|bothcase|', 'split', '|') - self.checkequal(['a', '', 'b\x00c\x00d'], 'a\x00\x00b\x00c\x00d', 'split', '\x00', 2) - - self.checkequal(['a']*20, ('a|'*20)[:-1], 'split', '|') - self.checkequal(['a']*15 +['a|a|a|a|a'], - ('a|'*20)[:-1], 'split', '|', 15) - - # by string - self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//') - self.checkequal(['a', 'b//c//d'], 'a//b//c//d', 'split', '//', 1) - self.checkequal(['a', 'b', 'c//d'], 'a//b//c//d', 'split', '//', 2) - self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//', 3) - self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//', 4) - self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//', - sys.maxsize-10) - self.checkequal(['a//b//c//d'], 'a//b//c//d', 'split', '//', 0) - self.checkequal(['a', '', 'b////c////d'], 'a////b////c////d', 'split', '//', 2) - self.checkequal(['endcase ', ''], 'endcase test', 'split', 'test') - self.checkequal(['', ' begincase'], 'test begincase', 'split', 'test') - self.checkequal(['', ' bothcase ', ''], 'test bothcase test', - 'split', 'test') - self.checkequal(['a', 'bc'], 'abbbc', 'split', 'bb') - self.checkequal(['', ''], 'aaa', 'split', 'aaa') - self.checkequal(['aaa'], 'aaa', 'split', 'aaa', 0) - self.checkequal(['ab', 'ab'], 'abbaab', 'split', 'ba') - self.checkequal(['aaaa'], 'aaaa', 'split', 'aab') - self.checkequal([''], '', 'split', 'aaa') - self.checkequal(['aa'], 'aa', 'split', 'aaa') - self.checkequal(['A', 'bobb'], 'Abbobbbobb', 'split', 'bbobb') - self.checkequal(['A', 'B', ''], 'AbbobbBbbobb', 'split', 'bbobb') - - self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'split', 'BLAH') - self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'split', 'BLAH', 19) - self.checkequal(['a']*18 + ['aBLAHa'], ('aBLAH'*20)[:-4], - 'split', 'BLAH', 18) - - # with keyword args - self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', sep='|') - self.checkequal(['a', 'b|c|d'], - 'a|b|c|d', 'split', '|', maxsplit=1) - self.checkequal(['a', 'b|c|d'], - 'a|b|c|d', 'split', sep='|', maxsplit=1) - self.checkequal(['a', 'b|c|d'], - 'a|b|c|d', 'split', maxsplit=1, sep='|') - self.checkequal(['a', 'b c d'], - 'a b c d', 'split', maxsplit=1) - - # argument type - self.checkraises(TypeError, 'hello', 'split', 42, 42, 42) - - # null case - self.checkraises(ValueError, 'hello', 'split', '') - self.checkraises(ValueError, 'hello', 'split', '', 0) - - def test_rsplit(self): - # by a char - self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|') - self.checkequal(['a|b|c', 'd'], 'a|b|c|d', 'rsplit', '|', 1) - self.checkequal(['a|b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 2) - self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 3) - self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 4) - self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', - sys.maxsize-100) - self.checkequal(['a|b|c|d'], 'a|b|c|d', 'rsplit', '|', 0) - self.checkequal(['a||b||c', '', 'd'], 'a||b||c||d', 'rsplit', '|', 2) - self.checkequal(['', ' begincase'], '| begincase', 'rsplit', '|') - self.checkequal(['endcase ', ''], 'endcase |', 'rsplit', '|') - self.checkequal(['', 'bothcase', ''], '|bothcase|', 'rsplit', '|') - - self.checkequal(['a\x00\x00b', 'c', 'd'], 'a\x00\x00b\x00c\x00d', 'rsplit', '\x00', 2) - - self.checkequal(['a']*20, ('a|'*20)[:-1], 'rsplit', '|') - self.checkequal(['a|a|a|a|a']+['a']*15, - ('a|'*20)[:-1], 'rsplit', '|', 15) - - # by string - self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//') - self.checkequal(['a//b//c', 'd'], 'a//b//c//d', 'rsplit', '//', 1) - self.checkequal(['a//b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 2) - self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 3) - self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 4) - self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', - sys.maxsize-5) - self.checkequal(['a//b//c//d'], 'a//b//c//d', 'rsplit', '//', 0) - self.checkequal(['a////b////c', '', 'd'], 'a////b////c////d', 'rsplit', '//', 2) - self.checkequal(['', ' begincase'], 'test begincase', 'rsplit', 'test') - self.checkequal(['endcase ', ''], 'endcase test', 'rsplit', 'test') - self.checkequal(['', ' bothcase ', ''], 'test bothcase test', - 'rsplit', 'test') - self.checkequal(['ab', 'c'], 'abbbc', 'rsplit', 'bb') - self.checkequal(['', ''], 'aaa', 'rsplit', 'aaa') - self.checkequal(['aaa'], 'aaa', 'rsplit', 'aaa', 0) - self.checkequal(['ab', 'ab'], 'abbaab', 'rsplit', 'ba') - self.checkequal(['aaaa'], 'aaaa', 'rsplit', 'aab') - self.checkequal([''], '', 'rsplit', 'aaa') - self.checkequal(['aa'], 'aa', 'rsplit', 'aaa') - self.checkequal(['bbob', 'A'], 'bbobbbobbA', 'rsplit', 'bbobb') - self.checkequal(['', 'B', 'A'], 'bbobbBbbobbA', 'rsplit', 'bbobb') - - self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'rsplit', 'BLAH') - self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'rsplit', 'BLAH', 19) - self.checkequal(['aBLAHa'] + ['a']*18, ('aBLAH'*20)[:-4], - 'rsplit', 'BLAH', 18) - - # with keyword args - self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', sep='|') - self.checkequal(['a|b|c', 'd'], - 'a|b|c|d', 'rsplit', '|', maxsplit=1) - self.checkequal(['a|b|c', 'd'], - 'a|b|c|d', 'rsplit', sep='|', maxsplit=1) - self.checkequal(['a|b|c', 'd'], - 'a|b|c|d', 'rsplit', maxsplit=1, sep='|') - self.checkequal(['a b c', 'd'], - 'a b c d', 'rsplit', maxsplit=1) - - # argument type - self.checkraises(TypeError, 'hello', 'rsplit', 42, 42, 42) - - # null case - self.checkraises(ValueError, 'hello', 'rsplit', '') - self.checkraises(ValueError, 'hello', 'rsplit', '', 0) - - def test_replace(self): - EQ = self.checkequal - - # Operations on the empty string - EQ("", "", "replace", "", "") - EQ("A", "", "replace", "", "A") - EQ("", "", "replace", "A", "") - EQ("", "", "replace", "A", "A") - EQ("", "", "replace", "", "", 100) - EQ("", "", "replace", "", "", sys.maxsize) - - # interleave (from=="", 'to' gets inserted everywhere) - EQ("A", "A", "replace", "", "") - EQ("*A*", "A", "replace", "", "*") - EQ("*1A*1", "A", "replace", "", "*1") - EQ("*-#A*-#", "A", "replace", "", "*-#") - EQ("*-A*-A*-", "AA", "replace", "", "*-") - EQ("*-A*-A*-", "AA", "replace", "", "*-", -1) - EQ("*-A*-A*-", "AA", "replace", "", "*-", sys.maxsize) - EQ("*-A*-A*-", "AA", "replace", "", "*-", 4) - EQ("*-A*-A*-", "AA", "replace", "", "*-", 3) - EQ("*-A*-A", "AA", "replace", "", "*-", 2) - EQ("*-AA", "AA", "replace", "", "*-", 1) - EQ("AA", "AA", "replace", "", "*-", 0) - - # single character deletion (from=="A", to=="") - EQ("", "A", "replace", "A", "") - EQ("", "AAA", "replace", "A", "") - EQ("", "AAA", "replace", "A", "", -1) - EQ("", "AAA", "replace", "A", "", sys.maxsize) - EQ("", "AAA", "replace", "A", "", 4) - EQ("", "AAA", "replace", "A", "", 3) - EQ("A", "AAA", "replace", "A", "", 2) - EQ("AA", "AAA", "replace", "A", "", 1) - EQ("AAA", "AAA", "replace", "A", "", 0) - EQ("", "AAAAAAAAAA", "replace", "A", "") - EQ("BCD", "ABACADA", "replace", "A", "") - EQ("BCD", "ABACADA", "replace", "A", "", -1) - EQ("BCD", "ABACADA", "replace", "A", "", sys.maxsize) - EQ("BCD", "ABACADA", "replace", "A", "", 5) - EQ("BCD", "ABACADA", "replace", "A", "", 4) - EQ("BCDA", "ABACADA", "replace", "A", "", 3) - EQ("BCADA", "ABACADA", "replace", "A", "", 2) - EQ("BACADA", "ABACADA", "replace", "A", "", 1) - EQ("ABACADA", "ABACADA", "replace", "A", "", 0) - EQ("BCD", "ABCAD", "replace", "A", "") - EQ("BCD", "ABCADAA", "replace", "A", "") - EQ("BCD", "BCD", "replace", "A", "") - EQ("*************", "*************", "replace", "A", "") - EQ("^A^", "^"+"A"*1000+"^", "replace", "A", "", 999) - - # substring deletion (from=="the", to=="") - EQ("", "the", "replace", "the", "") - EQ("ater", "theater", "replace", "the", "") - EQ("", "thethe", "replace", "the", "") - EQ("", "thethethethe", "replace", "the", "") - EQ("aaaa", "theatheatheathea", "replace", "the", "") - EQ("that", "that", "replace", "the", "") - EQ("thaet", "thaet", "replace", "the", "") - EQ("here and re", "here and there", "replace", "the", "") - EQ("here and re and re", "here and there and there", - "replace", "the", "", sys.maxsize) - EQ("here and re and re", "here and there and there", - "replace", "the", "", -1) - EQ("here and re and re", "here and there and there", - "replace", "the", "", 3) - EQ("here and re and re", "here and there and there", - "replace", "the", "", 2) - EQ("here and re and there", "here and there and there", - "replace", "the", "", 1) - EQ("here and there and there", "here and there and there", - "replace", "the", "", 0) - EQ("here and re and re", "here and there and there", "replace", "the", "") - - EQ("abc", "abc", "replace", "the", "") - EQ("abcdefg", "abcdefg", "replace", "the", "") - - # substring deletion (from=="bob", to=="") - EQ("bob", "bbobob", "replace", "bob", "") - EQ("bobXbob", "bbobobXbbobob", "replace", "bob", "") - EQ("aaaaaaa", "aaaaaaabob", "replace", "bob", "") - EQ("aaaaaaa", "aaaaaaa", "replace", "bob", "") - - # single character replace in place (len(from)==len(to)==1) - EQ("Who goes there?", "Who goes there?", "replace", "o", "o") - EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O") - EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", sys.maxsize) - EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", -1) - EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", 3) - EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", 2) - EQ("WhO goes there?", "Who goes there?", "replace", "o", "O", 1) - EQ("Who goes there?", "Who goes there?", "replace", "o", "O", 0) - - EQ("Who goes there?", "Who goes there?", "replace", "a", "q") - EQ("who goes there?", "Who goes there?", "replace", "W", "w") - EQ("wwho goes there?ww", "WWho goes there?WW", "replace", "W", "w") - EQ("Who goes there!", "Who goes there?", "replace", "?", "!") - EQ("Who goes there!!", "Who goes there??", "replace", "?", "!") - - EQ("Who goes there?", "Who goes there?", "replace", ".", "!") - - # substring replace in place (len(from)==len(to) > 1) - EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**") - EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", sys.maxsize) - EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", -1) - EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", 4) - EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", 3) - EQ("Th** ** a tissue", "This is a tissue", "replace", "is", "**", 2) - EQ("Th** is a tissue", "This is a tissue", "replace", "is", "**", 1) - EQ("This is a tissue", "This is a tissue", "replace", "is", "**", 0) - EQ("cobob", "bobob", "replace", "bob", "cob") - EQ("cobobXcobocob", "bobobXbobobob", "replace", "bob", "cob") - EQ("bobob", "bobob", "replace", "bot", "bot") - - # replace single character (len(from)==1, len(to)>1) - EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK") - EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK", -1) - EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK", sys.maxsize) - EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK", 2) - EQ("ReyKKjavik", "Reykjavik", "replace", "k", "KK", 1) - EQ("Reykjavik", "Reykjavik", "replace", "k", "KK", 0) - EQ("A----B----C----", "A.B.C.", "replace", ".", "----") - # issue #15534 - EQ('...\u043c......<', '...\u043c......<', "replace", "<", "<") - - EQ("Reykjavik", "Reykjavik", "replace", "q", "KK") - - # replace substring (len(from)>1, len(to)!=len(from)) - EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam", - "replace", "spam", "ham") - EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam", - "replace", "spam", "ham", sys.maxsize) - EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam", - "replace", "spam", "ham", -1) - EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam", - "replace", "spam", "ham", 4) - EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam", - "replace", "spam", "ham", 3) - EQ("ham, ham, eggs and spam", "spam, spam, eggs and spam", - "replace", "spam", "ham", 2) - EQ("ham, spam, eggs and spam", "spam, spam, eggs and spam", - "replace", "spam", "ham", 1) - EQ("spam, spam, eggs and spam", "spam, spam, eggs and spam", - "replace", "spam", "ham", 0) - - EQ("bobob", "bobobob", "replace", "bobob", "bob") - EQ("bobobXbobob", "bobobobXbobobob", "replace", "bobob", "bob") - EQ("BOBOBOB", "BOBOBOB", "replace", "bob", "bobby") - - # XXX Commented out. Is there any reason to support buffer objects - # as arguments for str.replace()? GvR -## ba = bytearray('a') -## bb = bytearray('b') -## EQ("bbc", "abc", "replace", ba, bb) -## EQ("aac", "abc", "replace", bb, ba) - - # - self.checkequal('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1) - self.checkequal('onetwothree', 'one!two!three!', 'replace', '!', '') - self.checkequal('one@two@three!', 'one!two!three!', 'replace', '!', '@', 2) - self.checkequal('one@two@three@', 'one!two!three!', 'replace', '!', '@', 3) - self.checkequal('one@two@three@', 'one!two!three!', 'replace', '!', '@', 4) - self.checkequal('one!two!three!', 'one!two!three!', 'replace', '!', '@', 0) - self.checkequal('one@two@three@', 'one!two!three!', 'replace', '!', '@') - self.checkequal('one!two!three!', 'one!two!three!', 'replace', 'x', '@') - self.checkequal('one!two!three!', 'one!two!three!', 'replace', 'x', '@', 2) - self.checkequal('-a-b-c-', 'abc', 'replace', '', '-') - self.checkequal('-a-b-c', 'abc', 'replace', '', '-', 3) - self.checkequal('abc', 'abc', 'replace', '', '-', 0) - self.checkequal('', '', 'replace', '', '') - self.checkequal('abc', 'abc', 'replace', 'ab', '--', 0) - self.checkequal('abc', 'abc', 'replace', 'xy', '--') - # Next three for SF bug 422088: [OSF1 alpha] string.replace(); died with - # MemoryError due to empty result (platform malloc issue when requesting - # 0 bytes). - self.checkequal('', '123', 'replace', '123', '') - self.checkequal('', '123123', 'replace', '123', '') - self.checkequal('x', '123x123', 'replace', '123', '') - - self.checkraises(TypeError, 'hello', 'replace') - self.checkraises(TypeError, 'hello', 'replace', 42) - self.checkraises(TypeError, 'hello', 'replace', 42, 'h') - self.checkraises(TypeError, 'hello', 'replace', 'h', 42) - - def test_replace_overflow(self): - # Check for overflow checking on 32 bit machines - if sys.maxsize != 2147483647 or struct.calcsize("P") > 4: - return - A2_16 = "A" * (2**16) - self.checkraises(OverflowError, A2_16, "replace", "", A2_16) - self.checkraises(OverflowError, A2_16, "replace", "A", A2_16) - self.checkraises(OverflowError, A2_16, "replace", "AA", A2_16+A2_16) - - - -class CommonTest(BaseTest): - # This testcase contains test that can be used in all - # stringlike classes. Currently this is str, unicode - # UserString and the string module. - - def test_hash(self): - # SF bug 1054139: += optimization was not invalidating cached hash value - a = self.type2test('DNSSEC') - b = self.type2test('') - for c in a: - b += c - hash(b) - self.assertEqual(hash(a), hash(b)) - - def test_capitalize(self): - self.checkequal(' hello ', ' hello ', 'capitalize') - self.checkequal('Hello ', 'Hello ','capitalize') - self.checkequal('Hello ', 'hello ','capitalize') - self.checkequal('Aaaa', 'aaaa', 'capitalize') - self.checkequal('Aaaa', 'AaAa', 'capitalize') - - # check that titlecased chars are lowered correctly - # \u1ffc is the titlecased char - self.checkequal('\u03a9\u0399\u1ff3\u1ff3\u1ff3', - '\u1ff3\u1ff3\u1ffc\u1ffc', 'capitalize') - # check with cased non-letter chars - self.checkequal('\u24c5\u24e8\u24e3\u24d7\u24de\u24dd', - '\u24c5\u24ce\u24c9\u24bd\u24c4\u24c3', 'capitalize') - self.checkequal('\u24c5\u24e8\u24e3\u24d7\u24de\u24dd', - '\u24df\u24e8\u24e3\u24d7\u24de\u24dd', 'capitalize') - self.checkequal('\u2160\u2171\u2172', - '\u2160\u2161\u2162', 'capitalize') - self.checkequal('\u2160\u2171\u2172', - '\u2170\u2171\u2172', 'capitalize') - # check with Ll chars with no upper - nothing changes here - self.checkequal('\u019b\u1d00\u1d86\u0221\u1fb7', - '\u019b\u1d00\u1d86\u0221\u1fb7', 'capitalize') - - self.checkraises(TypeError, 'hello', 'capitalize', 42) - - def test_lower(self): - self.checkequal('hello', 'HeLLo', 'lower') - self.checkequal('hello', 'hello', 'lower') - self.checkraises(TypeError, 'hello', 'lower', 42) - - def test_upper(self): - self.checkequal('HELLO', 'HeLLo', 'upper') - self.checkequal('HELLO', 'HELLO', 'upper') - self.checkraises(TypeError, 'hello', 'upper', 42) - - def test_expandtabs(self): - self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs') - self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 8) - self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 4) - self.checkequal('abc\r\nab def\ng hi', 'abc\r\nab\tdef\ng\thi', 'expandtabs', 4) - self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs') - self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 8) - self.checkequal('abc\r\nab\r\ndef\ng\r\nhi', 'abc\r\nab\r\ndef\ng\r\nhi', 'expandtabs', 4) - - self.checkraises(TypeError, 'hello', 'expandtabs', 42, 42) - - def test_additional_split(self): - self.checkequal(['this', 'is', 'the', 'split', 'function'], - 'this is the split function', 'split') - - # by whitespace - self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'split') - self.checkequal(['a', 'b c d'], 'a b c d', 'split', None, 1) - self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2) - self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 3) - self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 4) - self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, - sys.maxsize-1) - self.checkequal(['a b c d'], 'a b c d', 'split', None, 0) - self.checkequal(['a b c d'], ' a b c d', 'split', None, 0) - self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2) - - self.checkequal([], ' ', 'split') - self.checkequal(['a'], ' a ', 'split') - self.checkequal(['a', 'b'], ' a b ', 'split') - self.checkequal(['a', 'b '], ' a b ', 'split', None, 1) - self.checkequal(['a', 'b c '], ' a b c ', 'split', None, 1) - self.checkequal(['a', 'b', 'c '], ' a b c ', 'split', None, 2) - self.checkequal(['a', 'b'], '\n\ta \t\r b \v ', 'split') - aaa = ' a '*20 - self.checkequal(['a']*20, aaa, 'split') - self.checkequal(['a'] + [aaa[4:]], aaa, 'split', None, 1) - self.checkequal(['a']*19 + ['a '], aaa, 'split', None, 19) - - # mixed use of str and unicode - self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', ' ', 2) - - def test_additional_rsplit(self): - self.checkequal(['this', 'is', 'the', 'rsplit', 'function'], - 'this is the rsplit function', 'rsplit') - - # by whitespace - self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'rsplit') - self.checkequal(['a b c', 'd'], 'a b c d', 'rsplit', None, 1) - self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2) - self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 3) - self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 4) - self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, - sys.maxsize-20) - self.checkequal(['a b c d'], 'a b c d', 'rsplit', None, 0) - self.checkequal(['a b c d'], 'a b c d ', 'rsplit', None, 0) - self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2) - - self.checkequal([], ' ', 'rsplit') - self.checkequal(['a'], ' a ', 'rsplit') - self.checkequal(['a', 'b'], ' a b ', 'rsplit') - self.checkequal([' a', 'b'], ' a b ', 'rsplit', None, 1) - self.checkequal([' a b','c'], ' a b c ', 'rsplit', - None, 1) - self.checkequal([' a', 'b', 'c'], ' a b c ', 'rsplit', - None, 2) - self.checkequal(['a', 'b'], '\n\ta \t\r b \v ', 'rsplit', None, 88) - aaa = ' a '*20 - self.checkequal(['a']*20, aaa, 'rsplit') - self.checkequal([aaa[:-4]] + ['a'], aaa, 'rsplit', None, 1) - self.checkequal([' a a'] + ['a']*18, aaa, 'rsplit', None, 18) - - # mixed use of str and unicode - self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', ' ', 2) - - def test_strip(self): - self.checkequal('hello', ' hello ', 'strip') - self.checkequal('hello ', ' hello ', 'lstrip') - self.checkequal(' hello', ' hello ', 'rstrip') - self.checkequal('hello', 'hello', 'strip') - - # strip/lstrip/rstrip with None arg - self.checkequal('hello', ' hello ', 'strip', None) - self.checkequal('hello ', ' hello ', 'lstrip', None) - self.checkequal(' hello', ' hello ', 'rstrip', None) - self.checkequal('hello', 'hello', 'strip', None) - - # strip/lstrip/rstrip with str arg - self.checkequal('hello', 'xyzzyhelloxyzzy', 'strip', 'xyz') - self.checkequal('helloxyzzy', 'xyzzyhelloxyzzy', 'lstrip', 'xyz') - self.checkequal('xyzzyhello', 'xyzzyhelloxyzzy', 'rstrip', 'xyz') - self.checkequal('hello', 'hello', 'strip', 'xyz') - - self.checkraises(TypeError, 'hello', 'strip', 42, 42) - self.checkraises(TypeError, 'hello', 'lstrip', 42, 42) - self.checkraises(TypeError, 'hello', 'rstrip', 42, 42) - - def test_ljust(self): - self.checkequal('abc ', 'abc', 'ljust', 10) - self.checkequal('abc ', 'abc', 'ljust', 6) - self.checkequal('abc', 'abc', 'ljust', 3) - self.checkequal('abc', 'abc', 'ljust', 2) - self.checkequal('abc*******', 'abc', 'ljust', 10, '*') - self.checkraises(TypeError, 'abc', 'ljust') - - def test_rjust(self): - self.checkequal(' abc', 'abc', 'rjust', 10) - self.checkequal(' abc', 'abc', 'rjust', 6) - self.checkequal('abc', 'abc', 'rjust', 3) - self.checkequal('abc', 'abc', 'rjust', 2) - self.checkequal('*******abc', 'abc', 'rjust', 10, '*') - self.checkraises(TypeError, 'abc', 'rjust') - - def test_center(self): - self.checkequal(' abc ', 'abc', 'center', 10) - self.checkequal(' abc ', 'abc', 'center', 6) - self.checkequal('abc', 'abc', 'center', 3) - self.checkequal('abc', 'abc', 'center', 2) - self.checkequal('***abc****', 'abc', 'center', 10, '*') - self.checkraises(TypeError, 'abc', 'center') - - def test_swapcase(self): - self.checkequal('hEllO CoMPuTErS', 'HeLLo cOmpUteRs', 'swapcase') - - self.checkraises(TypeError, 'hello', 'swapcase', 42) - - def test_zfill(self): - self.checkequal('123', '123', 'zfill', 2) - self.checkequal('123', '123', 'zfill', 3) - self.checkequal('0123', '123', 'zfill', 4) - self.checkequal('+123', '+123', 'zfill', 3) - self.checkequal('+123', '+123', 'zfill', 4) - self.checkequal('+0123', '+123', 'zfill', 5) - self.checkequal('-123', '-123', 'zfill', 3) - self.checkequal('-123', '-123', 'zfill', 4) - self.checkequal('-0123', '-123', 'zfill', 5) - self.checkequal('000', '', 'zfill', 3) - self.checkequal('34', '34', 'zfill', 1) - self.checkequal('0034', '34', 'zfill', 4) - - self.checkraises(TypeError, '123', 'zfill') - -class MixinStrUnicodeUserStringTest(object): - # additional tests that only work for - # stringlike objects, i.e. str, unicode, UserString - # (but not the string module) - - def test_islower(self): - self.checkequal(False, '', 'islower') - self.checkequal(True, 'a', 'islower') - self.checkequal(False, 'A', 'islower') - self.checkequal(False, '\n', 'islower') - self.checkequal(True, 'abc', 'islower') - self.checkequal(False, 'aBc', 'islower') - self.checkequal(True, 'abc\n', 'islower') - self.checkraises(TypeError, 'abc', 'islower', 42) - - def test_isupper(self): - self.checkequal(False, '', 'isupper') - self.checkequal(False, 'a', 'isupper') - self.checkequal(True, 'A', 'isupper') - self.checkequal(False, '\n', 'isupper') - self.checkequal(True, 'ABC', 'isupper') - self.checkequal(False, 'AbC', 'isupper') - self.checkequal(True, 'ABC\n', 'isupper') - self.checkraises(TypeError, 'abc', 'isupper', 42) - - def test_istitle(self): - self.checkequal(False, '', 'istitle') - self.checkequal(False, 'a', 'istitle') - self.checkequal(True, 'A', 'istitle') - self.checkequal(False, '\n', 'istitle') - self.checkequal(True, 'A Titlecased Line', 'istitle') - self.checkequal(True, 'A\nTitlecased Line', 'istitle') - self.checkequal(True, 'A Titlecased, Line', 'istitle') - self.checkequal(False, 'Not a capitalized String', 'istitle') - self.checkequal(False, 'Not\ta Titlecase String', 'istitle') - self.checkequal(False, 'Not--a Titlecase String', 'istitle') - self.checkequal(False, 'NOT', 'istitle') - self.checkraises(TypeError, 'abc', 'istitle', 42) - - def test_isspace(self): - self.checkequal(False, '', 'isspace') - self.checkequal(False, 'a', 'isspace') - self.checkequal(True, ' ', 'isspace') - self.checkequal(True, '\t', 'isspace') - self.checkequal(True, '\r', 'isspace') - self.checkequal(True, '\n', 'isspace') - self.checkequal(True, ' \t\r\n', 'isspace') - self.checkequal(False, ' \t\r\na', 'isspace') - self.checkraises(TypeError, 'abc', 'isspace', 42) - - def test_isalpha(self): - self.checkequal(False, '', 'isalpha') - self.checkequal(True, 'a', 'isalpha') - self.checkequal(True, 'A', 'isalpha') - self.checkequal(False, '\n', 'isalpha') - self.checkequal(True, 'abc', 'isalpha') - self.checkequal(False, 'aBc123', 'isalpha') - self.checkequal(False, 'abc\n', 'isalpha') - self.checkraises(TypeError, 'abc', 'isalpha', 42) - - def test_isalnum(self): - self.checkequal(False, '', 'isalnum') - self.checkequal(True, 'a', 'isalnum') - self.checkequal(True, 'A', 'isalnum') - self.checkequal(False, '\n', 'isalnum') - self.checkequal(True, '123abc456', 'isalnum') - self.checkequal(True, 'a1b3c', 'isalnum') - self.checkequal(False, 'aBc000 ', 'isalnum') - self.checkequal(False, 'abc\n', 'isalnum') - self.checkraises(TypeError, 'abc', 'isalnum', 42) - - def test_isdigit(self): - self.checkequal(False, '', 'isdigit') - self.checkequal(False, 'a', 'isdigit') - self.checkequal(True, '0', 'isdigit') - self.checkequal(True, '0123456789', 'isdigit') - self.checkequal(False, '0123456789a', 'isdigit') - - self.checkraises(TypeError, 'abc', 'isdigit', 42) - - def test_title(self): - self.checkequal(' Hello ', ' hello ', 'title') - self.checkequal('Hello ', 'hello ', 'title') - self.checkequal('Hello ', 'Hello ', 'title') - self.checkequal('Format This As Title String', "fOrMaT thIs aS titLe String", 'title') - self.checkequal('Format,This-As*Title;String', "fOrMaT,thIs-aS*titLe;String", 'title', ) - self.checkequal('Getint', "getInt", 'title') - self.checkraises(TypeError, 'hello', 'title', 42) - - def test_splitlines(self): - self.checkequal(['abc', 'def', '', 'ghi'], "abc\ndef\n\rghi", 'splitlines') - self.checkequal(['abc', 'def', '', 'ghi'], "abc\ndef\n\r\nghi", 'splitlines') - self.checkequal(['abc', 'def', 'ghi'], "abc\ndef\r\nghi", 'splitlines') - self.checkequal(['abc', 'def', 'ghi'], "abc\ndef\r\nghi\n", 'splitlines') - self.checkequal(['abc', 'def', 'ghi', ''], "abc\ndef\r\nghi\n\r", 'splitlines') - self.checkequal(['', 'abc', 'def', 'ghi', ''], "\nabc\ndef\r\nghi\n\r", 'splitlines') - self.checkequal(['', 'abc', 'def', 'ghi', ''], - "\nabc\ndef\r\nghi\n\r", 'splitlines', False) - self.checkequal(['\n', 'abc\n', 'def\r\n', 'ghi\n', '\r'], - "\nabc\ndef\r\nghi\n\r", 'splitlines', True) - self.checkequal(['', 'abc', 'def', 'ghi', ''], "\nabc\ndef\r\nghi\n\r", - 'splitlines', keepends=False) - self.checkequal(['\n', 'abc\n', 'def\r\n', 'ghi\n', '\r'], - "\nabc\ndef\r\nghi\n\r", 'splitlines', keepends=True) - - self.checkraises(TypeError, 'abc', 'splitlines', 42, 42) - - def test_startswith(self): - self.checkequal(True, 'hello', 'startswith', 'he') - self.checkequal(True, 'hello', 'startswith', 'hello') - self.checkequal(False, 'hello', 'startswith', 'hello world') - self.checkequal(True, 'hello', 'startswith', '') - self.checkequal(False, 'hello', 'startswith', 'ello') - self.checkequal(True, 'hello', 'startswith', 'ello', 1) - self.checkequal(True, 'hello', 'startswith', 'o', 4) - self.checkequal(False, 'hello', 'startswith', 'o', 5) - self.checkequal(True, 'hello', 'startswith', '', 5) - self.checkequal(False, 'hello', 'startswith', 'lo', 6) - self.checkequal(True, 'helloworld', 'startswith', 'lowo', 3) - self.checkequal(True, 'helloworld', 'startswith', 'lowo', 3, 7) - self.checkequal(False, 'helloworld', 'startswith', 'lowo', 3, 6) - - # test negative indices - self.checkequal(True, 'hello', 'startswith', 'he', 0, -1) - self.checkequal(True, 'hello', 'startswith', 'he', -53, -1) - self.checkequal(False, 'hello', 'startswith', 'hello', 0, -1) - self.checkequal(False, 'hello', 'startswith', 'hello world', -1, -10) - self.checkequal(False, 'hello', 'startswith', 'ello', -5) - self.checkequal(True, 'hello', 'startswith', 'ello', -4) - self.checkequal(False, 'hello', 'startswith', 'o', -2) - self.checkequal(True, 'hello', 'startswith', 'o', -1) - self.checkequal(True, 'hello', 'startswith', '', -3, -3) - self.checkequal(False, 'hello', 'startswith', 'lo', -9) - - self.checkraises(TypeError, 'hello', 'startswith') - self.checkraises(TypeError, 'hello', 'startswith', 42) - - # test tuple arguments - self.checkequal(True, 'hello', 'startswith', ('he', 'ha')) - self.checkequal(False, 'hello', 'startswith', ('lo', 'llo')) - self.checkequal(True, 'hello', 'startswith', ('hellox', 'hello')) - self.checkequal(False, 'hello', 'startswith', ()) - self.checkequal(True, 'helloworld', 'startswith', ('hellowo', - 'rld', 'lowo'), 3) - self.checkequal(False, 'helloworld', 'startswith', ('hellowo', 'ello', - 'rld'), 3) - self.checkequal(True, 'hello', 'startswith', ('lo', 'he'), 0, -1) - self.checkequal(False, 'hello', 'startswith', ('he', 'hel'), 0, 1) - self.checkequal(True, 'hello', 'startswith', ('he', 'hel'), 0, 2) - - self.checkraises(TypeError, 'hello', 'startswith', (42,)) - - def test_endswith(self): - self.checkequal(True, 'hello', 'endswith', 'lo') - self.checkequal(False, 'hello', 'endswith', 'he') - self.checkequal(True, 'hello', 'endswith', '') - self.checkequal(False, 'hello', 'endswith', 'hello world') - self.checkequal(False, 'helloworld', 'endswith', 'worl') - self.checkequal(True, 'helloworld', 'endswith', 'worl', 3, 9) - self.checkequal(True, 'helloworld', 'endswith', 'world', 3, 12) - self.checkequal(True, 'helloworld', 'endswith', 'lowo', 1, 7) - self.checkequal(True, 'helloworld', 'endswith', 'lowo', 2, 7) - self.checkequal(True, 'helloworld', 'endswith', 'lowo', 3, 7) - self.checkequal(False, 'helloworld', 'endswith', 'lowo', 4, 7) - self.checkequal(False, 'helloworld', 'endswith', 'lowo', 3, 8) - self.checkequal(False, 'ab', 'endswith', 'ab', 0, 1) - self.checkequal(False, 'ab', 'endswith', 'ab', 0, 0) - - # test negative indices - self.checkequal(True, 'hello', 'endswith', 'lo', -2) - self.checkequal(False, 'hello', 'endswith', 'he', -2) - self.checkequal(True, 'hello', 'endswith', '', -3, -3) - self.checkequal(False, 'hello', 'endswith', 'hello world', -10, -2) - self.checkequal(False, 'helloworld', 'endswith', 'worl', -6) - self.checkequal(True, 'helloworld', 'endswith', 'worl', -5, -1) - self.checkequal(True, 'helloworld', 'endswith', 'worl', -5, 9) - self.checkequal(True, 'helloworld', 'endswith', 'world', -7, 12) - self.checkequal(True, 'helloworld', 'endswith', 'lowo', -99, -3) - self.checkequal(True, 'helloworld', 'endswith', 'lowo', -8, -3) - self.checkequal(True, 'helloworld', 'endswith', 'lowo', -7, -3) - self.checkequal(False, 'helloworld', 'endswith', 'lowo', 3, -4) - self.checkequal(False, 'helloworld', 'endswith', 'lowo', -8, -2) - - self.checkraises(TypeError, 'hello', 'endswith') - self.checkraises(TypeError, 'hello', 'endswith', 42) - - # test tuple arguments - self.checkequal(False, 'hello', 'endswith', ('he', 'ha')) - self.checkequal(True, 'hello', 'endswith', ('lo', 'llo')) - self.checkequal(True, 'hello', 'endswith', ('hellox', 'hello')) - self.checkequal(False, 'hello', 'endswith', ()) - self.checkequal(True, 'helloworld', 'endswith', ('hellowo', - 'rld', 'lowo'), 3) - self.checkequal(False, 'helloworld', 'endswith', ('hellowo', 'ello', - 'rld'), 3, -1) - self.checkequal(True, 'hello', 'endswith', ('hell', 'ell'), 0, -1) - self.checkequal(False, 'hello', 'endswith', ('he', 'hel'), 0, 1) - self.checkequal(True, 'hello', 'endswith', ('he', 'hell'), 0, 4) - - self.checkraises(TypeError, 'hello', 'endswith', (42,)) - - def test___contains__(self): - self.checkequal(True, '', '__contains__', '') - self.checkequal(True, 'abc', '__contains__', '') - self.checkequal(False, 'abc', '__contains__', '\0') - self.checkequal(True, '\0abc', '__contains__', '\0') - self.checkequal(True, 'abc\0', '__contains__', '\0') - self.checkequal(True, '\0abc', '__contains__', 'a') - self.checkequal(True, 'asdf', '__contains__', 'asdf') - self.checkequal(False, 'asd', '__contains__', 'asdf') - self.checkequal(False, '', '__contains__', 'asdf') - - def test_subscript(self): - self.checkequal('a', 'abc', '__getitem__', 0) - self.checkequal('c', 'abc', '__getitem__', -1) - self.checkequal('a', 'abc', '__getitem__', 0) - self.checkequal('abc', 'abc', '__getitem__', slice(0, 3)) - self.checkequal('abc', 'abc', '__getitem__', slice(0, 1000)) - self.checkequal('a', 'abc', '__getitem__', slice(0, 1)) - self.checkequal('', 'abc', '__getitem__', slice(0, 0)) - - self.checkraises(TypeError, 'abc', '__getitem__', 'def') - - def test_slice(self): - self.checkequal('abc', 'abc', '__getitem__', slice(0, 1000)) - self.checkequal('abc', 'abc', '__getitem__', slice(0, 3)) - self.checkequal('ab', 'abc', '__getitem__', slice(0, 2)) - self.checkequal('bc', 'abc', '__getitem__', slice(1, 3)) - self.checkequal('b', 'abc', '__getitem__', slice(1, 2)) - self.checkequal('', 'abc', '__getitem__', slice(2, 2)) - self.checkequal('', 'abc', '__getitem__', slice(1000, 1000)) - self.checkequal('', 'abc', '__getitem__', slice(2000, 1000)) - self.checkequal('', 'abc', '__getitem__', slice(2, 1)) - - self.checkraises(TypeError, 'abc', '__getitem__', 'def') - - def test_extended_getslice(self): - # Test extended slicing by comparing with list slicing. - s = string.ascii_letters + string.digits - indices = (0, None, 1, 3, 41, -1, -2, -37) - for start in indices: - for stop in indices: - # Skip step 0 (invalid) - for step in indices[1:]: - L = list(s)[start:stop:step] - self.checkequal("".join(L), s, '__getitem__', - slice(start, stop, step)) - - def test_mul(self): - self.checkequal('', 'abc', '__mul__', -1) - self.checkequal('', 'abc', '__mul__', 0) - self.checkequal('abc', 'abc', '__mul__', 1) - self.checkequal('abcabcabc', 'abc', '__mul__', 3) - self.checkraises(TypeError, 'abc', '__mul__') - self.checkraises(TypeError, 'abc', '__mul__', '') - # XXX: on a 64-bit system, this doesn't raise an overflow error, - # but either raises a MemoryError, or succeeds (if you have 54TiB) - #self.checkraises(OverflowError, 10000*'abc', '__mul__', 2000000000) - - def test_join(self): - # join now works with any sequence type - # moved here, because the argument order is - # different in string.join (see the test in - # test.test_string.StringTest.test_join) - self.checkequal('a b c d', ' ', 'join', ['a', 'b', 'c', 'd']) - self.checkequal('abcd', '', 'join', ('a', 'b', 'c', 'd')) - self.checkequal('bd', '', 'join', ('', 'b', '', 'd')) - self.checkequal('ac', '', 'join', ('a', '', 'c', '')) - self.checkequal('w x y z', ' ', 'join', Sequence()) - self.checkequal('abc', 'a', 'join', ('abc',)) - self.checkequal('z', 'a', 'join', UserList(['z'])) - self.checkequal('a.b.c', '.', 'join', ['a', 'b', 'c']) - self.assertRaises(TypeError, '.'.join, ['a', 'b', 3]) - for i in [5, 25, 125]: - self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join', - ['a' * i] * i) - self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join', - ('a' * i,) * i) - - #self.checkequal(str(BadSeq1()), ' ', 'join', BadSeq1()) - self.checkequal('a b c', ' ', 'join', BadSeq2()) - - self.checkraises(TypeError, ' ', 'join') - self.checkraises(TypeError, ' ', 'join', 7) - self.checkraises(TypeError, ' ', 'join', [1, 2, bytes()]) - try: - def f(): - yield 4 + "" - self.fixtype(' ').join(f()) - except TypeError as e: - if '+' not in str(e): - self.fail('join() ate exception message') - else: - self.fail('exception not raised') - - def test_formatting(self): - self.checkequal('+hello+', '+%s+', '__mod__', 'hello') - self.checkequal('+10+', '+%d+', '__mod__', 10) - self.checkequal('a', "%c", '__mod__', "a") - self.checkequal('a', "%c", '__mod__', "a") - self.checkequal('"', "%c", '__mod__', 34) - self.checkequal('$', "%c", '__mod__', 36) - self.checkequal('10', "%d", '__mod__', 10) - self.checkequal('\x7f', "%c", '__mod__', 0x7f) - - for ordinal in (-100, 0x200000): - # unicode raises ValueError, str raises OverflowError - self.checkraises((ValueError, OverflowError), '%c', '__mod__', ordinal) - - longvalue = sys.maxsize + 10 - slongvalue = str(longvalue) - self.checkequal(' 42', '%3ld', '__mod__', 42) - self.checkequal('42', '%d', '__mod__', 42.0) - self.checkequal(slongvalue, '%d', '__mod__', longvalue) - self.checkcall('%d', '__mod__', float(longvalue)) - self.checkequal('0042.00', '%07.2f', '__mod__', 42) - self.checkequal('0042.00', '%07.2F', '__mod__', 42) - - self.checkraises(TypeError, 'abc', '__mod__') - self.checkraises(TypeError, '%(foo)s', '__mod__', 42) - self.checkraises(TypeError, '%s%s', '__mod__', (42,)) - self.checkraises(TypeError, '%c', '__mod__', (None,)) - self.checkraises(ValueError, '%(foo', '__mod__', {}) - self.checkraises(TypeError, '%(foo)s %(bar)s', '__mod__', ('foo', 42)) - self.checkraises(TypeError, '%d', '__mod__', "42") # not numeric - self.checkraises(TypeError, '%d', '__mod__', (42+0j)) # no int conversion provided - - # argument names with properly nested brackets are supported - self.checkequal('bar', '%((foo))s', '__mod__', {'(foo)': 'bar'}) - - # 100 is a magic number in PyUnicode_Format, this forces a resize - self.checkequal(103*'a'+'x', '%sx', '__mod__', 103*'a') - - self.checkraises(TypeError, '%*s', '__mod__', ('foo', 'bar')) - self.checkraises(TypeError, '%10.*f', '__mod__', ('foo', 42.)) - self.checkraises(ValueError, '%10', '__mod__', (42,)) - - # Outrageously large width or precision should raise ValueError. - self.checkraises(ValueError, '%%%df' % (2**64), '__mod__', (3.2)) - self.checkraises(ValueError, '%%.%df' % (2**64), '__mod__', (3.2)) - - self.checkraises(OverflowError, '%*s', '__mod__', - (_testcapi.PY_SSIZE_T_MAX + 1, '')) - self.checkraises(OverflowError, '%.*f', '__mod__', - (_testcapi.INT_MAX + 1, 1. / 7)) - # Issue 15989 - self.checkraises(OverflowError, '%*s', '__mod__', - (1 << (_testcapi.PY_SSIZE_T_MAX.bit_length() + 1), '')) - self.checkraises(OverflowError, '%.*f', '__mod__', - (_testcapi.UINT_MAX + 1, 1. / 7)) - - class X(object): pass - self.checkraises(TypeError, 'abc', '__mod__', X()) - - def test_floatformatting(self): - # float formatting - for prec in range(100): - format = '%%.%if' % prec - value = 0.01 - for x in range(60): - value = value * 3.14159265359 / 3.0 * 10.0 - self.checkcall(format, "__mod__", value) - - def test_inplace_rewrites(self): - # Check that strings don't copy and modify cached single-character strings - self.checkequal('a', 'A', 'lower') - self.checkequal(True, 'A', 'isupper') - self.checkequal('A', 'a', 'upper') - self.checkequal(True, 'a', 'islower') - - self.checkequal('a', 'A', 'replace', 'A', 'a') - self.checkequal(True, 'A', 'isupper') - - self.checkequal('A', 'a', 'capitalize') - self.checkequal(True, 'a', 'islower') - - self.checkequal('A', 'a', 'swapcase') - self.checkequal(True, 'a', 'islower') - - self.checkequal('A', 'a', 'title') - self.checkequal(True, 'a', 'islower') - - def test_partition(self): - - self.checkequal(('this is the par', 'ti', 'tion method'), - 'this is the partition method', 'partition', 'ti') - - # from raymond's original specification - S = 'http://www.python.org' - self.checkequal(('http', '://', 'www.python.org'), S, 'partition', '://') - self.checkequal(('http://www.python.org', '', ''), S, 'partition', '?') - self.checkequal(('', 'http://', 'www.python.org'), S, 'partition', 'http://') - self.checkequal(('http://www.python.', 'org', ''), S, 'partition', 'org') - - self.checkraises(ValueError, S, 'partition', '') - self.checkraises(TypeError, S, 'partition', None) - - def test_rpartition(self): - - self.checkequal(('this is the rparti', 'ti', 'on method'), - 'this is the rpartition method', 'rpartition', 'ti') - - # from raymond's original specification - S = 'http://www.python.org' - self.checkequal(('http', '://', 'www.python.org'), S, 'rpartition', '://') - self.checkequal(('', '', 'http://www.python.org'), S, 'rpartition', '?') - self.checkequal(('', 'http://', 'www.python.org'), S, 'rpartition', 'http://') - self.checkequal(('http://www.python.', 'org', ''), S, 'rpartition', 'org') - - self.checkraises(ValueError, S, 'rpartition', '') - self.checkraises(TypeError, S, 'rpartition', None) - - def test_none_arguments(self): - # issue 11828 - s = 'hello' - self.checkequal(2, s, 'find', 'l', None) - self.checkequal(3, s, 'find', 'l', -2, None) - self.checkequal(2, s, 'find', 'l', None, -2) - self.checkequal(0, s, 'find', 'h', None, None) - - self.checkequal(3, s, 'rfind', 'l', None) - self.checkequal(3, s, 'rfind', 'l', -2, None) - self.checkequal(2, s, 'rfind', 'l', None, -2) - self.checkequal(0, s, 'rfind', 'h', None, None) - - self.checkequal(2, s, 'index', 'l', None) - self.checkequal(3, s, 'index', 'l', -2, None) - self.checkequal(2, s, 'index', 'l', None, -2) - self.checkequal(0, s, 'index', 'h', None, None) - - self.checkequal(3, s, 'rindex', 'l', None) - self.checkequal(3, s, 'rindex', 'l', -2, None) - self.checkequal(2, s, 'rindex', 'l', None, -2) - self.checkequal(0, s, 'rindex', 'h', None, None) - - self.checkequal(2, s, 'count', 'l', None) - self.checkequal(1, s, 'count', 'l', -2, None) - self.checkequal(1, s, 'count', 'l', None, -2) - self.checkequal(0, s, 'count', 'x', None, None) - - self.checkequal(True, s, 'endswith', 'o', None) - self.checkequal(True, s, 'endswith', 'lo', -2, None) - self.checkequal(True, s, 'endswith', 'l', None, -2) - self.checkequal(False, s, 'endswith', 'x', None, None) - - self.checkequal(True, s, 'startswith', 'h', None) - self.checkequal(True, s, 'startswith', 'l', -2, None) - self.checkequal(True, s, 'startswith', 'h', None, -2) - self.checkequal(False, s, 'startswith', 'x', None, None) - - def test_find_etc_raise_correct_error_messages(self): - # issue 11828 - s = 'hello' - x = 'x' - self.assertRaisesRegex(TypeError, r'^find\(', s.find, - x, None, None, None) - self.assertRaisesRegex(TypeError, r'^rfind\(', s.rfind, - x, None, None, None) - self.assertRaisesRegex(TypeError, r'^index\(', s.index, - x, None, None, None) - self.assertRaisesRegex(TypeError, r'^rindex\(', s.rindex, - x, None, None, None) - self.assertRaisesRegex(TypeError, r'^count\(', s.count, - x, None, None, None) - self.assertRaisesRegex(TypeError, r'^startswith\(', s.startswith, - x, None, None, None) - self.assertRaisesRegex(TypeError, r'^endswith\(', s.endswith, - x, None, None, None) - - # issue #15534 - self.checkequal(10, "...\u043c......<", "find", "<") - - -class MixinStrUnicodeTest(object): - # Additional tests that only work with str and unicode. - - def test_bug1001011(self): - # Make sure join returns a NEW object for single item sequences - # involving a subclass. - # Make sure that it is of the appropriate type. - # Check the optimisation still occurs for standard objects. - t = self.type2test - class subclass(t): - pass - s1 = subclass("abcd") - s2 = t().join([s1]) - self.assertIsNot(s1, s2) - self.assertIs(type(s2), t) - - s1 = t("abcd") - s2 = t().join([s1]) - self.assertIs(s1, s2) - - # Should also test mixed-type join. - if t is str: - s1 = subclass("abcd") - s2 = "".join([s1]) - self.assertIsNot(s1, s2) - self.assertIs(type(s2), t) - - s1 = t("abcd") - s2 = "".join([s1]) - self.assertIs(s1, s2) - -## elif t is str8: -## s1 = subclass("abcd") -## s2 = "".join([s1]) -## self.assertIsNot(s1, s2) -## self.assertIs(type(s2), str) # promotes! - -## s1 = t("abcd") -## s2 = "".join([s1]) -## self.assertIsNot(s1, s2) -## self.assertIs(type(s2), str) # promotes! - - else: - self.fail("unexpected type for MixinStrUnicodeTest %r" % t) - diff --git a/future/tests/base.py b/future/tests/base.py deleted file mode 100644 index e430684f..00000000 --- a/future/tests/base.py +++ /dev/null @@ -1,278 +0,0 @@ -import os -import tempfile -import unittest -if not hasattr(unittest, 'skip'): - import unittest2 as unittest - -from textwrap import dedent -import subprocess - -# For Python 2.6 compatibility: see http://stackoverflow.com/questions/4814970/ -if "check_output" not in dir(subprocess): # duck punch it in! - def f(*popenargs, **kwargs): - if 'stdout' in kwargs: - raise ValueError('stdout argument not allowed, it will be overridden.') - process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) - output, unused_err = process.communicate() - retcode = process.poll() - if retcode: - cmd = kwargs.get("args") - if cmd is None: - cmd = popenargs[0] - raise subprocess.CalledProcessError(retcode, cmd) - return output - subprocess.check_output = f - -class CodeHandler(unittest.TestCase): - """ - Handy mixin for test classes for writing / reading / futurizing / - running .py files in the test suite. - """ - def setUp(self): - """ - The outputs from the various futurize stages should have the - following headers: - """ - # After stage1: - # TODO: use this form after implementing a fixer to consolidate - # __future__ imports into a single line: - # self.headers1 = """ - # from __future__ import absolute_import, division, print_function - # """ - self.headers1 = self.reformat(""" - from __future__ import absolute_import - from __future__ import division - from __future__ import print_function - """) - - # After stage2: - # TODO: use this form after implementing a fixer to consolidate - # __future__ imports into a single line: - # self.headers2 = """ - # from __future__ import (absolute_import, division, - # print_function, unicode_literals) - # from future import standard_library - # from future.builtins import * - # """ - self.headers2 = self.reformat(""" - from __future__ import absolute_import - from __future__ import division - from __future__ import print_function - from __future__ import unicode_literals - from future import standard_library - from future.builtins import * - """) - self.interpreters = ['python'] - self.tempdir = tempfile.mkdtemp() + os.path.sep - self.env = {'PYTHONPATH': os.getcwd()} - - def convert(self, code, stages=(1, 2), all_imports=False, from3=False, - reformat=True, tobytes=True, run=True): - """ - Converts the code block using ``futurize`` and returns the - resulting code. - - Passing stages=[1] or stages=[2] passes the flag ``--stage1`` or - ``stage2`` to ``futurize``. Passing both stages runs ``futurize`` - with both stages by default. - - If from3 is False, runs ``futurize`` in the default mode, - converting from Python 2 to both 2 and 3. If from3 is True, runs - ``futurize --from3`` to convert from Python 3 to both 2 and 3. - - Optionally reformats the code block first using the reformat() - method. - - If run is True, runs the resulting code under all Python - interpreters in self.interpreters. - """ - if reformat: - code = self.reformat(code) - self._write_test_script(code) - self._futurize_test_script(stages=stages, all_imports=all_imports, - from3=from3, tobytes=tobytes) - output = self._read_test_script() - if run: - for interpreter in self.interpreters: - _ = self._run_test_script(interpreter=interpreter) - return output - - def reformat(self, code): - """ - Removes any leading \n and dedents. - """ - if code.startswith('\n'): - code = code[1:] - return dedent(code) - - def check(self, output, expected, ignore_imports=True): - """ - Compares whether the code blocks are equal. If not, raises an - exception so the test fails. Ignores any trailing whitespace like - blank lines. - - If ignore_imports is True, passes the code blocks into the - strip_future_imports method. - """ - # self.assertEqual(expected.rstrip(), - # self.order_future_lines(output).rstrip()) - if ignore_imports: - output = self.strip_future_imports(output) - expected = self.strip_future_imports(expected) - self.assertEqual(self.order_future_lines(output.rstrip()), - expected.rstrip()) - - def strip_future_imports(self, code): - """ - Strips any of these import lines: - - from __future__ import - from future - from future. - - Limitation: doesn't handle imports split across multiple lines like - this: - - from __future__ import (absolute_import, division, print_function, - unicode_literals) - """ - output = [] - for line in code.splitlines(): - if not (line.startswith('from __future__ import ') - or line.startswith('from future ') - # but don't match "from future_builtins" :) - or line.startswith('from future.')): - output.append(line) - return '\n'.join(output) - - def convert_check(self, before, expected, stages=(1, 2), - all_imports=False, ignore_imports=True, from3=False, - tobytes=False, run=True): - """ - Convenience method that calls convert() and check(). - - Reformats the code blocks automatically using the reformat() - method. - - If all_imports is passed, we add the appropriate import headers - for the stage(s) selected to the ``expected`` code-block, so they - needn't appear repeatedly in the test code. - - If ignore_imports is True, ignores the presence of any lines - beginning: - - from __future__ import ... - from future import ... - - for the purpose of the comparison. - """ - output = self.convert(before, stages=stages, - all_imports=all_imports, from3=from3, - tobytes=tobytes, run=run) - if all_imports: - headers = self.headers2 if 2 in stages else self.headers1 - else: - headers = '' - - self.check(output, self.reformat(headers + expected), - ignore_imports=ignore_imports) - - def check_old(self, output, expected, stages=(1, 2), ignore_imports=True): - """ - Checks that the output is equal to the expected output, after - reformatting. - - Pass ``expected`` as a string (as a code block). It will be - reformatted and compared with the resulting code. We assert that - the output of the conversion of ``before`` with ``futurize`` is - equal to ``after``. Unless ignore_imports is True, the - appropriate headers for the stage(s) used are added automatically - for the comparison. - """ - headers = '' - # if not ignore_imports: - # if 2 in stages: - # headers = self.headers2 - # else: - # headers = self.headers1 - self.compare(output, headers + self.reformat(expected), - ignore_imports=ignore_imports) - - def order_future_lines(self, code): - """ - TODO: simplify this hideous code ... - - Returns the code block with any ``__future__`` import lines sorted, and - then any ``future`` import lines sorted. - """ - codelines = code.splitlines() - # Under under future lines: - uufuture_line_numbers = [i for i in range(len(codelines)) if codelines[i].startswith('from __future__ import ')] - sorted_uufuture_lines = sorted([codelines[i] for i in uufuture_line_numbers]) - - # future import lines: - future_line_numbers = [i for i in range(len(codelines)) if codelines[i].startswith('from future')] - sorted_future_lines = sorted([codelines[i] for i in future_line_numbers]) - - # Replace the old unsorted "from __future__ import ..." lines with the - # new sorted ones: - codelines2 = [] - for i in range(len(codelines)): - if i in uufuture_line_numbers: - codelines2.append(sorted_uufuture_lines[i]) - elif i in future_line_numbers: - codelines2.append(sorted_future_lines[i - len(uufuture_line_numbers)]) - else: - codelines2.append(codelines[i]) - return '\n'.join(codelines2) - - def unchanged(self, code, **kwargs): - """ - Convenience method to ensure the code is unchanged by the - futurize process. - """ - self.convert_check(code, code, **kwargs) - - def _write_test_script(self, code, filename='mytestscript.py'): - """ - Dedents the given code (a multiline string) and writes it out to - a file in a temporary folder like /tmp/tmpUDCn7x/mytestscript.py. - """ - with open(self.tempdir + filename, 'w') as f: - f.write(dedent(code)) - - def _read_test_script(self, filename='mytestscript.py'): - with open(self.tempdir + filename) as f: - newsource = f.read() - return newsource - - def _futurize_test_script(self, filename='mytestscript.py', stages=(1, 2), - all_imports=False, from3=False, tobytes=False): - params = [] - stages = list(stages) - if all_imports: - params.append('--all-imports') - if from3: - params.append('--from3') - if tobytes: - params.append('--tobytes') - if stages == [1]: - params.append('--stage1') - elif stages == [2]: - params.append('--stage2') - else: - assert stages == [1, 2] - # No extra params needed - - output = subprocess.check_output(['python', 'futurize.py'] + params + - ['-w', self.tempdir + filename], - stderr=subprocess.STDOUT) - return output - - def _run_test_script(self, filename='mytestscript.py', - interpreter='python'): - env = {'PYTHONPATH': os.getcwd()} - return subprocess.check_output([interpreter, self.tempdir + filename], - env=env) - - diff --git a/future/tests/disabled/test_bytes_from_py33.py b/future/tests/disabled/test_bytes_from_py33.py deleted file mode 100644 index 329e99da..00000000 --- a/future/tests/disabled/test_bytes_from_py33.py +++ /dev/null @@ -1,1425 +0,0 @@ -"""Unit tests for the bytes and bytearray types. - -XXX This is a mess. Common tests should be moved to buffer_tests.py, -which itself ought to be unified with string_tests.py (and the latter -should be modernized). -""" -from __future__ import (absolute_import, division, - print_function, unicode_literals) -from future import standard_library -from future.builtins import * - -import os -import re -import sys -import copy -import functools -import pickle -import tempfile -import unittest - -# Some Python installations (e.g. travis-ci.org Py2.7 and Py3.3) are -# missing the Python test suite (and there's no Ubuntu 12.04 package to -# install it), although a skeleton test package still exists with -# a couple of modules like test.support. We probably don't want to -# install standard library hooks ever on Py3, so the two imports below -# will fail. In this case, just exit. No probem: on Py3 we don't redefine -# bytes anyway. -try: - import test.support - import test.string_tests - import test.buffer_tests -except ImportError: - from future.utils import PY3 - if PY3: - sys.exit(0) - else: - raise - -if sys.flags.bytes_warning: - def check_bytes_warnings(func): - @functools.wraps(func) - def wrapper(*args, **kw): - with test.support.check_warnings(('', BytesWarning)): - return func(*args, **kw) - return wrapper -else: - # no-op - def check_bytes_warnings(func): - return func - - -class Indexable: - def __init__(self, value=0): - self.value = value - def __index__(self): - return self.value - - -class BaseBytesTest: - - def test_basics(self): - b = self.type2test() - self.assertEqual(type(b), self.type2test) - self.assertEqual(b.__class__, self.type2test) - - def test_copy(self): - a = self.type2test(b"abcd") - for copy_method in (copy.copy, copy.deepcopy): - b = copy_method(a) - self.assertEqual(a, b) - self.assertEqual(type(a), type(b)) - - def test_empty_sequence(self): - b = self.type2test() - self.assertEqual(len(b), 0) - self.assertRaises(IndexError, lambda: b[0]) - self.assertRaises(IndexError, lambda: b[1]) - self.assertRaises(IndexError, lambda: b[sys.maxsize]) - self.assertRaises(IndexError, lambda: b[sys.maxsize+1]) - self.assertRaises(IndexError, lambda: b[10**100]) - self.assertRaises(IndexError, lambda: b[-1]) - self.assertRaises(IndexError, lambda: b[-2]) - self.assertRaises(IndexError, lambda: b[-sys.maxsize]) - self.assertRaises(IndexError, lambda: b[-sys.maxsize-1]) - self.assertRaises(IndexError, lambda: b[-sys.maxsize-2]) - self.assertRaises(IndexError, lambda: b[-10**100]) - - def test_from_list(self): - ints = list(range(256)) - b = self.type2test(i for i in ints) - self.assertEqual(len(b), 256) - self.assertEqual(list(b), ints) - - def test_from_index(self): - b = self.type2test([Indexable(), Indexable(1), Indexable(254), - Indexable(255)]) - self.assertEqual(list(b), [0, 1, 254, 255]) - self.assertRaises(ValueError, self.type2test, [Indexable(-1)]) - self.assertRaises(ValueError, self.type2test, [Indexable(256)]) - - def test_from_ssize(self): - self.assertEqual(self.type2test(0), b'') - self.assertEqual(self.type2test(1), b'\x00') - self.assertEqual(self.type2test(5), b'\x00\x00\x00\x00\x00') - self.assertRaises(ValueError, self.type2test, -1) - - self.assertEqual(self.type2test('0', 'ascii'), b'0') - self.assertEqual(self.type2test(b'0'), b'0') - self.assertRaises(OverflowError, self.type2test, sys.maxsize + 1) - - def test_constructor_type_errors(self): - self.assertRaises(TypeError, self.type2test, 0.0) - class C: - pass - self.assertRaises(TypeError, self.type2test, ["0"]) - self.assertRaises(TypeError, self.type2test, [0.0]) - self.assertRaises(TypeError, self.type2test, [None]) - self.assertRaises(TypeError, self.type2test, [C()]) - - def test_constructor_value_errors(self): - self.assertRaises(ValueError, self.type2test, [-1]) - self.assertRaises(ValueError, self.type2test, [-sys.maxsize]) - self.assertRaises(ValueError, self.type2test, [-sys.maxsize-1]) - self.assertRaises(ValueError, self.type2test, [-sys.maxsize-2]) - self.assertRaises(ValueError, self.type2test, [-10**100]) - self.assertRaises(ValueError, self.type2test, [256]) - self.assertRaises(ValueError, self.type2test, [257]) - self.assertRaises(ValueError, self.type2test, [sys.maxsize]) - self.assertRaises(ValueError, self.type2test, [sys.maxsize+1]) - self.assertRaises(ValueError, self.type2test, [10**100]) - - def test_compare(self): - b1 = self.type2test([1, 2, 3]) - b2 = self.type2test([1, 2, 3]) - b3 = self.type2test([1, 3]) - - self.assertEqual(b1, b2) - self.assertTrue(b2 != b3) - self.assertTrue(b1 <= b2) - self.assertTrue(b1 <= b3) - self.assertTrue(b1 < b3) - self.assertTrue(b1 >= b2) - self.assertTrue(b3 >= b2) - self.assertTrue(b3 > b2) - - self.assertFalse(b1 != b2) - self.assertFalse(b2 == b3) - self.assertFalse(b1 > b2) - self.assertFalse(b1 > b3) - self.assertFalse(b1 >= b3) - self.assertFalse(b1 < b2) - self.assertFalse(b3 < b2) - self.assertFalse(b3 <= b2) - - @check_bytes_warnings - def test_compare_to_str(self): - # Byte comparisons with unicode should always fail! - # Test this for all expected byte orders and Unicode character - # sizes. - self.assertEqual(self.type2test(b"\0a\0b\0c") == "abc", False) - self.assertEqual(self.type2test(b"\0\0\0a\0\0\0b\0\0\0c") == "abc", - False) - self.assertEqual(self.type2test(b"a\0b\0c\0") == "abc", False) - self.assertEqual(self.type2test(b"a\0\0\0b\0\0\0c\0\0\0") == "abc", - False) - self.assertEqual(self.type2test() == str(), False) - self.assertEqual(self.type2test() != str(), True) - - def test_reversed(self): - input = list(map(ord, "Hello")) - b = self.type2test(input) - output = list(reversed(b)) - input.reverse() - self.assertEqual(output, input) - - def test_getslice(self): - def by(s): - return self.type2test(map(ord, s)) - b = by("Hello, world") - - self.assertEqual(b[:5], by("Hello")) - self.assertEqual(b[1:5], by("ello")) - self.assertEqual(b[5:7], by(", ")) - self.assertEqual(b[7:], by("world")) - self.assertEqual(b[7:12], by("world")) - self.assertEqual(b[7:100], by("world")) - - self.assertEqual(b[:-7], by("Hello")) - self.assertEqual(b[-11:-7], by("ello")) - self.assertEqual(b[-7:-5], by(", ")) - self.assertEqual(b[-5:], by("world")) - self.assertEqual(b[-5:12], by("world")) - self.assertEqual(b[-5:100], by("world")) - self.assertEqual(b[-100:5], by("Hello")) - - def test_extended_getslice(self): - # Test extended slicing by comparing with list slicing. - L = list(range(255)) - b = self.type2test(L) - indices = (0, None, 1, 3, 19, 100, -1, -2, -31, -100) - for start in indices: - for stop in indices: - # Skip step 0 (invalid) - for step in indices[1:]: - self.assertEqual(b[start:stop:step], self.type2test(L[start:stop:step])) - - def test_encoding(self): - sample = "Hello world\n\u1234\u5678\u9abc" - for enc in ("utf-8", "utf-16"): - b = self.type2test(sample, enc) - self.assertEqual(b, self.type2test(sample.encode(enc))) - self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin-1") - b = self.type2test(sample, "latin-1", "ignore") - self.assertEqual(b, self.type2test(sample[:-3], "utf-8")) - - def test_decode(self): - sample = "Hello world\n\u1234\u5678\u9abc\def0\def0" - for enc in ("utf-8", "utf-16"): - b = self.type2test(sample, enc) - self.assertEqual(b.decode(enc), sample) - sample = "Hello world\n\x80\x81\xfe\xff" - b = self.type2test(sample, "latin-1") - self.assertRaises(UnicodeDecodeError, b.decode, "utf-8") - self.assertEqual(b.decode("utf-8", "ignore"), "Hello world\n") - self.assertEqual(b.decode(errors="ignore", encoding="utf-8"), - "Hello world\n") - # Default encoding is utf-8 - self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603') - - def test_from_int(self): - b = self.type2test(0) - self.assertEqual(b, self.type2test()) - b = self.type2test(10) - self.assertEqual(b, self.type2test([0]*10)) - b = self.type2test(10000) - self.assertEqual(b, self.type2test([0]*10000)) - - def test_concat(self): - b1 = self.type2test(b"abc") - b2 = self.type2test(b"def") - self.assertEqual(b1 + b2, b"abcdef") - self.assertEqual(b1 + bytes(b"def"), b"abcdef") - self.assertEqual(bytes(b"def") + b1, b"defabc") - self.assertRaises(TypeError, lambda: b1 + "def") - self.assertRaises(TypeError, lambda: "abc" + b2) - - def test_repeat(self): - for b in b"abc", self.type2test(b"abc"): - self.assertEqual(b * 3, b"abcabcabc") - self.assertEqual(b * 0, b"") - self.assertEqual(b * -1, b"") - self.assertRaises(TypeError, lambda: b * 3.14) - self.assertRaises(TypeError, lambda: 3.14 * b) - # XXX Shouldn't bytes and bytearray agree on what to raise? - with self.assertRaises((OverflowError, MemoryError)): - c = b * sys.maxsize - with self.assertRaises((OverflowError, MemoryError)): - b *= sys.maxsize - - def test_repeat_1char(self): - self.assertEqual(self.type2test(b'x')*100, self.type2test([ord('x')]*100)) - - def test_contains(self): - b = self.type2test(b"abc") - self.assertIn(ord('a'), b) - self.assertIn(int(ord('a')), b) - self.assertNotIn(200, b) - self.assertRaises(ValueError, lambda: 300 in b) - self.assertRaises(ValueError, lambda: -1 in b) - self.assertRaises(TypeError, lambda: None in b) - self.assertRaises(TypeError, lambda: float(ord('a')) in b) - self.assertRaises(TypeError, lambda: "a" in b) - for f in bytes, bytearray: - self.assertIn(f(b""), b) - self.assertIn(f(b"a"), b) - self.assertIn(f(b"b"), b) - self.assertIn(f(b"c"), b) - self.assertIn(f(b"ab"), b) - self.assertIn(f(b"bc"), b) - self.assertIn(f(b"abc"), b) - self.assertNotIn(f(b"ac"), b) - self.assertNotIn(f(b"d"), b) - self.assertNotIn(f(b"dab"), b) - self.assertNotIn(f(b"abd"), b) - - def test_fromhex(self): - self.assertRaises(TypeError, self.type2test.fromhex) - self.assertRaises(TypeError, self.type2test.fromhex, 1) - self.assertEqual(self.type2test.fromhex(''), self.type2test()) - b = bytearray([0x1a, 0x2b, 0x30]) - self.assertEqual(self.type2test.fromhex('1a2B30'), b) - self.assertEqual(self.type2test.fromhex(' 1A 2B 30 '), b) - self.assertEqual(self.type2test.fromhex('0000'), b'\0\0') - self.assertRaises(TypeError, self.type2test.fromhex, b'1B') - self.assertRaises(ValueError, self.type2test.fromhex, 'a') - self.assertRaises(ValueError, self.type2test.fromhex, 'rt') - self.assertRaises(ValueError, self.type2test.fromhex, '1a b cd') - self.assertRaises(ValueError, self.type2test.fromhex, '\x00') - self.assertRaises(ValueError, self.type2test.fromhex, '12 \x00 34') - - def test_join(self): - self.assertEqual(self.type2test(b"").join([]), b"") - self.assertEqual(self.type2test(b"").join([b""]), b"") - for lst in [[b"abc"], [b"a", b"bc"], [b"ab", b"c"], [b"a", b"b", b"c"]]: - lst = list(map(self.type2test, lst)) - self.assertEqual(self.type2test(b"").join(lst), b"abc") - self.assertEqual(self.type2test(b"").join(tuple(lst)), b"abc") - self.assertEqual(self.type2test(b"").join(iter(lst)), b"abc") - self.assertEqual(self.type2test(b".").join([b"ab", b"cd"]), b"ab.cd") - # XXX more... - - def test_count(self): - b = self.type2test(b'mississippi') - i = 105 - p = 112 - w = 119 - - self.assertEqual(b.count(b'i'), 4) - self.assertEqual(b.count(b'ss'), 2) - self.assertEqual(b.count(b'w'), 0) - - self.assertEqual(b.count(i), 4) - self.assertEqual(b.count(w), 0) - - self.assertEqual(b.count(b'i', 6), 2) - self.assertEqual(b.count(b'p', 6), 2) - self.assertEqual(b.count(b'i', 1, 3), 1) - self.assertEqual(b.count(b'p', 7, 9), 1) - - self.assertEqual(b.count(i, 6), 2) - self.assertEqual(b.count(p, 6), 2) - self.assertEqual(b.count(i, 1, 3), 1) - self.assertEqual(b.count(p, 7, 9), 1) - - def test_startswith(self): - b = self.type2test(b'hello') - self.assertFalse(self.type2test().startswith(b"anything")) - self.assertTrue(b.startswith(b"hello")) - self.assertTrue(b.startswith(b"hel")) - self.assertTrue(b.startswith(b"h")) - self.assertFalse(b.startswith(b"hellow")) - self.assertFalse(b.startswith(b"ha")) - with self.assertRaises(TypeError) as cm: - b.startswith([b'h']) - exc = str(cm.exception) - self.assertIn('bytes', exc) - self.assertIn('tuple', exc) - - def test_endswith(self): - b = self.type2test(b'hello') - self.assertFalse(bytearray().endswith(b"anything")) - self.assertTrue(b.endswith(b"hello")) - self.assertTrue(b.endswith(b"llo")) - self.assertTrue(b.endswith(b"o")) - self.assertFalse(b.endswith(b"whello")) - self.assertFalse(b.endswith(b"no")) - with self.assertRaises(TypeError) as cm: - b.endswith([b'o']) - exc = str(cm.exception) - self.assertIn('bytes', exc) - self.assertIn('tuple', exc) - - def test_find(self): - b = self.type2test(b'mississippi') - i = 105 - w = 119 - - self.assertEqual(b.find(b'ss'), 2) - self.assertEqual(b.find(b'w'), -1) - self.assertEqual(b.find(b'mississippian'), -1) - - self.assertEqual(b.find(i), 1) - self.assertEqual(b.find(w), -1) - - self.assertEqual(b.find(b'ss', 3), 5) - self.assertEqual(b.find(b'ss', 1, 7), 2) - self.assertEqual(b.find(b'ss', 1, 3), -1) - - self.assertEqual(b.find(i, 6), 7) - self.assertEqual(b.find(i, 1, 3), 1) - self.assertEqual(b.find(w, 1, 3), -1) - - for index in (-1, 256, sys.maxsize + 1): - self.assertRaisesRegex( - ValueError, r'byte must be in range\(0, 256\)', - b.find, index) - - def test_rfind(self): - b = self.type2test(b'mississippi') - i = 105 - w = 119 - - self.assertEqual(b.rfind(b'ss'), 5) - self.assertEqual(b.rfind(b'w'), -1) - self.assertEqual(b.rfind(b'mississippian'), -1) - - self.assertEqual(b.rfind(i), 10) - self.assertEqual(b.rfind(w), -1) - - self.assertEqual(b.rfind(b'ss', 3), 5) - self.assertEqual(b.rfind(b'ss', 0, 6), 2) - - self.assertEqual(b.rfind(i, 1, 3), 1) - self.assertEqual(b.rfind(i, 3, 9), 7) - self.assertEqual(b.rfind(w, 1, 3), -1) - - def test_index(self): - b = self.type2test(b'mississippi') - i = 105 - w = 119 - - self.assertEqual(b.index(b'ss'), 2) - self.assertRaises(ValueError, b.index, b'w') - self.assertRaises(ValueError, b.index, b'mississippian') - - self.assertEqual(b.index(i), 1) - self.assertRaises(ValueError, b.index, w) - - self.assertEqual(b.index(b'ss', 3), 5) - self.assertEqual(b.index(b'ss', 1, 7), 2) - self.assertRaises(ValueError, b.index, b'ss', 1, 3) - - self.assertEqual(b.index(i, 6), 7) - self.assertEqual(b.index(i, 1, 3), 1) - self.assertRaises(ValueError, b.index, w, 1, 3) - - def test_rindex(self): - b = self.type2test(b'mississippi') - i = 105 - w = 119 - - self.assertEqual(b.rindex(b'ss'), 5) - self.assertRaises(ValueError, b.rindex, b'w') - self.assertRaises(ValueError, b.rindex, b'mississippian') - - self.assertEqual(b.rindex(i), 10) - self.assertRaises(ValueError, b.rindex, w) - - self.assertEqual(b.rindex(b'ss', 3), 5) - self.assertEqual(b.rindex(b'ss', 0, 6), 2) - - self.assertEqual(b.rindex(i, 1, 3), 1) - self.assertEqual(b.rindex(i, 3, 9), 7) - self.assertRaises(ValueError, b.rindex, w, 1, 3) - - def test_replace(self): - b = self.type2test(b'mississippi') - self.assertEqual(b.replace(b'i', b'a'), b'massassappa') - self.assertEqual(b.replace(b'ss', b'x'), b'mixixippi') - - def test_split(self): - b = self.type2test(b'mississippi') - self.assertEqual(b.split(b'i'), [b'm', b'ss', b'ss', b'pp', b'']) - self.assertEqual(b.split(b'ss'), [b'mi', b'i', b'ippi']) - self.assertEqual(b.split(b'w'), [b]) - # with keyword args - b = self.type2test(b'a|b|c|d') - self.assertEqual(b.split(sep=b'|'), [b'a', b'b', b'c', b'd']) - self.assertEqual(b.split(b'|', maxsplit=1), [b'a', b'b|c|d']) - self.assertEqual(b.split(sep=b'|', maxsplit=1), [b'a', b'b|c|d']) - self.assertEqual(b.split(maxsplit=1, sep=b'|'), [b'a', b'b|c|d']) - b = self.type2test(b'a b c d') - self.assertEqual(b.split(maxsplit=1), [b'a', b'b c d']) - - def test_split_whitespace(self): - for b in (b' arf barf ', b'arf\tbarf', b'arf\nbarf', b'arf\rbarf', - b'arf\fbarf', b'arf\vbarf'): - b = self.type2test(b) - self.assertEqual(b.split(), [b'arf', b'barf']) - self.assertEqual(b.split(None), [b'arf', b'barf']) - self.assertEqual(b.split(None, 2), [b'arf', b'barf']) - for b in (b'a\x1Cb', b'a\x1Db', b'a\x1Eb', b'a\x1Fb'): - b = self.type2test(b) - self.assertEqual(b.split(), [b]) - self.assertEqual(self.type2test(b' a bb c ').split(None, 0), [b'a bb c ']) - self.assertEqual(self.type2test(b' a bb c ').split(None, 1), [b'a', b'bb c ']) - self.assertEqual(self.type2test(b' a bb c ').split(None, 2), [b'a', b'bb', b'c ']) - self.assertEqual(self.type2test(b' a bb c ').split(None, 3), [b'a', b'bb', b'c']) - - def test_split_string_error(self): - self.assertRaises(TypeError, self.type2test(b'a b').split, ' ') - - def test_split_unicodewhitespace(self): - b = self.type2test(b"\x09\x0A\x0B\x0C\x0D\x1C\x1D\x1E\x1F") - self.assertEqual(b.split(), [b'\x1c\x1d\x1e\x1f']) - - def test_rsplit(self): - b = self.type2test(b'mississippi') - self.assertEqual(b.rsplit(b'i'), [b'm', b'ss', b'ss', b'pp', b'']) - self.assertEqual(b.rsplit(b'ss'), [b'mi', b'i', b'ippi']) - self.assertEqual(b.rsplit(b'w'), [b]) - # with keyword args - b = self.type2test(b'a|b|c|d') - self.assertEqual(b.rsplit(sep=b'|'), [b'a', b'b', b'c', b'd']) - self.assertEqual(b.rsplit(b'|', maxsplit=1), [b'a|b|c', b'd']) - self.assertEqual(b.rsplit(sep=b'|', maxsplit=1), [b'a|b|c', b'd']) - self.assertEqual(b.rsplit(maxsplit=1, sep=b'|'), [b'a|b|c', b'd']) - b = self.type2test(b'a b c d') - self.assertEqual(b.rsplit(maxsplit=1), [b'a b c', b'd']) - - def test_rsplit_whitespace(self): - for b in (b' arf barf ', b'arf\tbarf', b'arf\nbarf', b'arf\rbarf', - b'arf\fbarf', b'arf\vbarf'): - b = self.type2test(b) - self.assertEqual(b.rsplit(), [b'arf', b'barf']) - self.assertEqual(b.rsplit(None), [b'arf', b'barf']) - self.assertEqual(b.rsplit(None, 2), [b'arf', b'barf']) - self.assertEqual(self.type2test(b' a bb c ').rsplit(None, 0), [b' a bb c']) - self.assertEqual(self.type2test(b' a bb c ').rsplit(None, 1), [b' a bb', b'c']) - self.assertEqual(self.type2test(b' a bb c ').rsplit(None, 2), [b' a', b'bb', b'c']) - self.assertEqual(self.type2test(b' a bb c ').rsplit(None, 3), [b'a', b'bb', b'c']) - - def test_rsplit_string_error(self): - self.assertRaises(TypeError, self.type2test(b'a b').rsplit, ' ') - - def test_rsplit_unicodewhitespace(self): - b = self.type2test(b"\x09\x0A\x0B\x0C\x0D\x1C\x1D\x1E\x1F") - self.assertEqual(b.rsplit(), [b'\x1c\x1d\x1e\x1f']) - - def test_partition(self): - b = self.type2test(b'mississippi') - self.assertEqual(b.partition(b'ss'), (b'mi', b'ss', b'issippi')) - self.assertEqual(b.partition(b'w'), (b'mississippi', b'', b'')) - - def test_rpartition(self): - b = self.type2test(b'mississippi') - self.assertEqual(b.rpartition(b'ss'), (b'missi', b'ss', b'ippi')) - self.assertEqual(b.rpartition(b'i'), (b'mississipp', b'i', b'')) - self.assertEqual(b.rpartition(b'w'), (b'', b'', b'mississippi')) - - def test_pickling(self): - for proto in range(pickle.HIGHEST_PROTOCOL + 1): - for b in b"", b"a", b"abc", b"\xffab\x80", b"\0\0\377\0\0": - b = self.type2test(b) - ps = pickle.dumps(b, proto) - q = pickle.loads(ps) - self.assertEqual(b, q) - - def test_iterator_pickling(self): - for b in b"", b"a", b"abc", b"\xffab\x80", b"\0\0\377\0\0": - it = itorg = iter(self.type2test(b)) - data = list(self.type2test(b)) - d = pickle.dumps(it) - it = pickle.loads(d) - self.assertEqual(type(itorg), type(it)) - self.assertEqual(list(it), data) - - it = pickle.loads(d) - try: - next(it) - except StopIteration: - continue - d = pickle.dumps(it) - it = pickle.loads(d) - self.assertEqual(list(it), data[1:]) - - def test_strip(self): - b = self.type2test(b'mississippi') - self.assertEqual(b.strip(b'i'), b'mississipp') - self.assertEqual(b.strip(b'm'), b'ississippi') - self.assertEqual(b.strip(b'pi'), b'mississ') - self.assertEqual(b.strip(b'im'), b'ssissipp') - self.assertEqual(b.strip(b'pim'), b'ssiss') - self.assertEqual(b.strip(b), b'') - - def test_lstrip(self): - b = self.type2test(b'mississippi') - self.assertEqual(b.lstrip(b'i'), b'mississippi') - self.assertEqual(b.lstrip(b'm'), b'ississippi') - self.assertEqual(b.lstrip(b'pi'), b'mississippi') - self.assertEqual(b.lstrip(b'im'), b'ssissippi') - self.assertEqual(b.lstrip(b'pim'), b'ssissippi') - - def test_rstrip(self): - b = self.type2test(b'mississippi') - self.assertEqual(b.rstrip(b'i'), b'mississipp') - self.assertEqual(b.rstrip(b'm'), b'mississippi') - self.assertEqual(b.rstrip(b'pi'), b'mississ') - self.assertEqual(b.rstrip(b'im'), b'mississipp') - self.assertEqual(b.rstrip(b'pim'), b'mississ') - - def test_strip_whitespace(self): - b = self.type2test(b' \t\n\r\f\vabc \t\n\r\f\v') - self.assertEqual(b.strip(), b'abc') - self.assertEqual(b.lstrip(), b'abc \t\n\r\f\v') - self.assertEqual(b.rstrip(), b' \t\n\r\f\vabc') - - def test_strip_bytearray(self): - self.assertEqual(self.type2test(b'abc').strip(memoryview(b'ac')), b'b') - self.assertEqual(self.type2test(b'abc').lstrip(memoryview(b'ac')), b'bc') - self.assertEqual(self.type2test(b'abc').rstrip(memoryview(b'ac')), b'ab') - - def test_strip_string_error(self): - self.assertRaises(TypeError, self.type2test(b'abc').strip, 'b') - self.assertRaises(TypeError, self.type2test(b'abc').lstrip, 'b') - self.assertRaises(TypeError, self.type2test(b'abc').rstrip, 'b') - - def test_center(self): - # Fill character can be either bytes or bytearray (issue 12380) - b = self.type2test(b'abc') - for fill_type in (bytes, bytearray): - self.assertEqual(b.center(7, fill_type(b'-')), - self.type2test(b'--abc--')) - - def test_ljust(self): - # Fill character can be either bytes or bytearray (issue 12380) - b = self.type2test(b'abc') - for fill_type in (bytes, bytearray): - self.assertEqual(b.ljust(7, fill_type(b'-')), - self.type2test(b'abc----')) - - def test_rjust(self): - # Fill character can be either bytes or bytearray (issue 12380) - b = self.type2test(b'abc') - for fill_type in (bytes, bytearray): - self.assertEqual(b.rjust(7, fill_type(b'-')), - self.type2test(b'----abc')) - - def test_ord(self): - b = self.type2test(b'\0A\x7f\x80\xff') - self.assertEqual([ord(b[i:i+1]) for i in range(len(b))], - [0, 65, 127, 128, 255]) - - def test_maketrans(self): - transtable = b'\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' - self.assertEqual(self.type2test.maketrans(b'abc', b'xyz'), transtable) - transtable = b'\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374xyz' - self.assertEqual(self.type2test.maketrans(b'\375\376\377', b'xyz'), transtable) - self.assertRaises(ValueError, self.type2test.maketrans, b'abc', b'xyzq') - self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def') - - def test_none_arguments(self): - # issue 11828 - b = self.type2test(b'hello') - l = self.type2test(b'l') - h = self.type2test(b'h') - x = self.type2test(b'x') - o = self.type2test(b'o') - - self.assertEqual(2, b.find(l, None)) - self.assertEqual(3, b.find(l, -2, None)) - self.assertEqual(2, b.find(l, None, -2)) - self.assertEqual(0, b.find(h, None, None)) - - self.assertEqual(3, b.rfind(l, None)) - self.assertEqual(3, b.rfind(l, -2, None)) - self.assertEqual(2, b.rfind(l, None, -2)) - self.assertEqual(0, b.rfind(h, None, None)) - - self.assertEqual(2, b.index(l, None)) - self.assertEqual(3, b.index(l, -2, None)) - self.assertEqual(2, b.index(l, None, -2)) - self.assertEqual(0, b.index(h, None, None)) - - self.assertEqual(3, b.rindex(l, None)) - self.assertEqual(3, b.rindex(l, -2, None)) - self.assertEqual(2, b.rindex(l, None, -2)) - self.assertEqual(0, b.rindex(h, None, None)) - - self.assertEqual(2, b.count(l, None)) - self.assertEqual(1, b.count(l, -2, None)) - self.assertEqual(1, b.count(l, None, -2)) - self.assertEqual(0, b.count(x, None, None)) - - self.assertEqual(True, b.endswith(o, None)) - self.assertEqual(True, b.endswith(o, -2, None)) - self.assertEqual(True, b.endswith(l, None, -2)) - self.assertEqual(False, b.endswith(x, None, None)) - - self.assertEqual(True, b.startswith(h, None)) - self.assertEqual(True, b.startswith(l, -2, None)) - self.assertEqual(True, b.startswith(h, None, -2)) - self.assertEqual(False, b.startswith(x, None, None)) - - def test_integer_arguments_out_of_byte_range(self): - b = self.type2test(b'hello') - - for method in (b.count, b.find, b.index, b.rfind, b.rindex): - self.assertRaises(ValueError, method, -1) - self.assertRaises(ValueError, method, 256) - self.assertRaises(ValueError, method, 9999) - - def test_find_etc_raise_correct_error_messages(self): - # issue 11828 - b = self.type2test(b'hello') - x = self.type2test(b'x') - self.assertRaisesRegex(TypeError, r'\bfind\b', b.find, - x, None, None, None) - self.assertRaisesRegex(TypeError, r'\brfind\b', b.rfind, - x, None, None, None) - self.assertRaisesRegex(TypeError, r'\bindex\b', b.index, - x, None, None, None) - self.assertRaisesRegex(TypeError, r'\brindex\b', b.rindex, - x, None, None, None) - self.assertRaisesRegex(TypeError, r'\bcount\b', b.count, - x, None, None, None) - self.assertRaisesRegex(TypeError, r'\bstartswith\b', b.startswith, - x, None, None, None) - self.assertRaisesRegex(TypeError, r'\bendswith\b', b.endswith, - x, None, None, None) - - -@unittest.expectedFailure -class BytesTest(BaseBytesTest, unittest.TestCase): - type2test = bytes - - def test_buffer_is_readonly(self): - fd = os.dup(sys.stdin.fileno()) - with open(fd, "rb", buffering=0) as f: - self.assertRaises(TypeError, f.readinto, b"") - - def test_custom(self): - class A: - def __bytes__(self): - return b'abc' - self.assertEqual(bytes(A()), b'abc') - class A: pass - self.assertRaises(TypeError, bytes, A()) - class A: - def __bytes__(self): - return None - self.assertRaises(TypeError, bytes, A()) - class A: - def __bytes__(self): - return b'a' - def __index__(self): - return 42 - self.assertEqual(bytes(A()), b'a') - - # Test PyBytes_FromFormat() - def test_from_format(self): - test.support.import_module('ctypes') - from ctypes import pythonapi, py_object, c_int, c_char_p - PyBytes_FromFormat = pythonapi.PyBytes_FromFormat - PyBytes_FromFormat.restype = py_object - - self.assertEqual(PyBytes_FromFormat(b'format'), - b'format') - - self.assertEqual(PyBytes_FromFormat(b'%'), b'%') - self.assertEqual(PyBytes_FromFormat(b'%%'), b'%') - self.assertEqual(PyBytes_FromFormat(b'%%s'), b'%s') - self.assertEqual(PyBytes_FromFormat(b'[%%]'), b'[%]') - self.assertEqual(PyBytes_FromFormat(b'%%%c', c_int(ord('_'))), b'%_') - - self.assertEqual(PyBytes_FromFormat(b'c:%c', c_int(255)), - b'c:\xff') - self.assertEqual(PyBytes_FromFormat(b's:%s', c_char_p(b'cstr')), - b's:cstr') - - -@unittest.expectedFailure -class ByteArrayTest(BaseBytesTest, unittest.TestCase): - type2test = bytearray - - def test_nohash(self): - self.assertRaises(TypeError, hash, bytearray()) - - def test_bytearray_api(self): - short_sample = b"Hello world\n" - sample = short_sample + b"\0"*(20 - len(short_sample)) - tfn = tempfile.mktemp() - try: - # Prepare - with open(tfn, "wb") as f: - f.write(short_sample) - # Test readinto - with open(tfn, "rb") as f: - b = bytearray(20) - n = f.readinto(b) - self.assertEqual(n, len(short_sample)) - self.assertEqual(list(b), list(sample)) - # Test writing in binary mode - with open(tfn, "wb") as f: - f.write(b) - with open(tfn, "rb") as f: - self.assertEqual(f.read(), sample) - # Text mode is ambiguous; don't test - finally: - try: - os.remove(tfn) - except os.error: - pass - - def test_reverse(self): - b = bytearray(b'hello') - self.assertEqual(b.reverse(), None) - self.assertEqual(b, b'olleh') - b = bytearray(b'hello1') # test even number of items - b.reverse() - self.assertEqual(b, b'1olleh') - b = bytearray() - b.reverse() - self.assertFalse(b) - - def test_clear(self): - b = bytearray(b'python') - b.clear() - self.assertEqual(b, b'') - - b = bytearray(b'') - b.clear() - self.assertEqual(b, b'') - - b = bytearray(b'') - b.append(ord('r')) - b.clear() - b.append(ord('p')) - self.assertEqual(b, b'p') - - def test_copy(self): - b = bytearray(b'abc') - bb = b.copy() - self.assertEqual(bb, b'abc') - - b = bytearray(b'') - bb = b.copy() - self.assertEqual(bb, b'') - - # test that it's indeed a copy and not a reference - b = bytearray(b'abc') - bb = b.copy() - self.assertEqual(b, bb) - self.assertIsNot(b, bb) - bb.append(ord('d')) - self.assertEqual(bb, b'abcd') - self.assertEqual(b, b'abc') - - def test_regexps(self): - def by(s): - return bytearray(map(ord, s)) - b = by("Hello, world") - self.assertEqual(re.findall(br"\w+", b), [by("Hello"), by("world")]) - - def test_setitem(self): - b = bytearray([1, 2, 3]) - b[1] = 100 - self.assertEqual(b, bytearray([1, 100, 3])) - b[-1] = 200 - self.assertEqual(b, bytearray([1, 100, 200])) - b[0] = Indexable(10) - self.assertEqual(b, bytearray([10, 100, 200])) - try: - b[3] = 0 - self.fail("Didn't raise IndexError") - except IndexError: - pass - try: - b[-10] = 0 - self.fail("Didn't raise IndexError") - except IndexError: - pass - try: - b[0] = 256 - self.fail("Didn't raise ValueError") - except ValueError: - pass - try: - b[0] = Indexable(-1) - self.fail("Didn't raise ValueError") - except ValueError: - pass - try: - b[0] = None - self.fail("Didn't raise TypeError") - except TypeError: - pass - - def test_delitem(self): - b = bytearray(range(10)) - del b[0] - self.assertEqual(b, bytearray(range(1, 10))) - del b[-1] - self.assertEqual(b, bytearray(range(1, 9))) - del b[4] - self.assertEqual(b, bytearray([1, 2, 3, 4, 6, 7, 8])) - - def test_setslice(self): - b = bytearray(range(10)) - self.assertEqual(list(b), list(range(10))) - - b[0:5] = bytearray([1, 1, 1, 1, 1]) - self.assertEqual(b, bytearray([1, 1, 1, 1, 1, 5, 6, 7, 8, 9])) - - del b[0:-5] - self.assertEqual(b, bytearray([5, 6, 7, 8, 9])) - - b[0:0] = bytearray([0, 1, 2, 3, 4]) - self.assertEqual(b, bytearray(range(10))) - - b[-7:-3] = bytearray([100, 101]) - self.assertEqual(b, bytearray([0, 1, 2, 100, 101, 7, 8, 9])) - - b[3:5] = [3, 4, 5, 6] - self.assertEqual(b, bytearray(range(10))) - - b[3:0] = [42, 42, 42] - self.assertEqual(b, bytearray([0, 1, 2, 42, 42, 42, 3, 4, 5, 6, 7, 8, 9])) - - b[3:] = b'foo' - self.assertEqual(b, bytearray([0, 1, 2, 102, 111, 111])) - - b[:3] = memoryview(b'foo') - self.assertEqual(b, bytearray([102, 111, 111, 102, 111, 111])) - - b[3:4] = [] - self.assertEqual(b, bytearray([102, 111, 111, 111, 111])) - - for elem in [5, -5, 0, int(10e20), 'str', 2.3, - ['a', 'b'], [b'a', b'b'], [[]]]: - with self.assertRaises(TypeError): - b[3:4] = elem - - for elem in [[254, 255, 256], [-256, 9000]]: - with self.assertRaises(ValueError): - b[3:4] = elem - - def test_extended_set_del_slice(self): - indices = (0, None, 1, 3, 19, 300, 1<<333, -1, -2, -31, -300) - for start in indices: - for stop in indices: - # Skip invalid step 0 - for step in indices[1:]: - L = list(range(255)) - b = bytearray(L) - # Make sure we have a slice of exactly the right length, - # but with different data. - data = L[start:stop:step] - data.reverse() - L[start:stop:step] = data - b[start:stop:step] = data - self.assertEqual(b, bytearray(L)) - - del L[start:stop:step] - del b[start:stop:step] - self.assertEqual(b, bytearray(L)) - - def test_setslice_trap(self): - # This test verifies that we correctly handle assigning self - # to a slice of self (the old Lambert Meertens trap). - b = bytearray(range(256)) - b[8:] = b - self.assertEqual(b, bytearray(list(range(8)) + list(range(256)))) - - def test_iconcat(self): - b = bytearray(b"abc") - b1 = b - b += b"def" - self.assertEqual(b, b"abcdef") - self.assertEqual(b, b1) - self.assertTrue(b is b1) - b += b"xyz" - self.assertEqual(b, b"abcdefxyz") - try: - b += "" - except TypeError: - pass - else: - self.fail("bytes += unicode didn't raise TypeError") - - def test_irepeat(self): - b = bytearray(b"abc") - b1 = b - b *= 3 - self.assertEqual(b, b"abcabcabc") - self.assertEqual(b, b1) - self.assertTrue(b is b1) - - def test_irepeat_1char(self): - b = bytearray(b"x") - b1 = b - b *= 100 - self.assertEqual(b, b"x"*100) - self.assertEqual(b, b1) - self.assertTrue(b is b1) - - def test_alloc(self): - b = bytearray() - alloc = b.__alloc__() - self.assertTrue(alloc >= 0) - seq = [alloc] - for i in range(100): - b += b"x" - alloc = b.__alloc__() - self.assertTrue(alloc >= len(b)) - if alloc not in seq: - seq.append(alloc) - - def test_extend(self): - orig = b'hello' - a = bytearray(orig) - a.extend(a) - self.assertEqual(a, orig + orig) - self.assertEqual(a[5:], orig) - a = bytearray(b'') - # Test iterators that don't have a __length_hint__ - a.extend(map(int, orig * 25)) - a.extend(int(x) for x in orig * 25) - self.assertEqual(a, orig * 50) - self.assertEqual(a[-5:], orig) - a = bytearray(b'') - a.extend(iter(map(int, orig * 50))) - self.assertEqual(a, orig * 50) - self.assertEqual(a[-5:], orig) - a = bytearray(b'') - a.extend(list(map(int, orig * 50))) - self.assertEqual(a, orig * 50) - self.assertEqual(a[-5:], orig) - a = bytearray(b'') - self.assertRaises(ValueError, a.extend, [0, 1, 2, 256]) - self.assertRaises(ValueError, a.extend, [0, 1, 2, -1]) - self.assertEqual(len(a), 0) - a = bytearray(b'') - a.extend([Indexable(ord('a'))]) - self.assertEqual(a, b'a') - - def test_remove(self): - b = bytearray(b'hello') - b.remove(ord('l')) - self.assertEqual(b, b'helo') - b.remove(ord('l')) - self.assertEqual(b, b'heo') - self.assertRaises(ValueError, lambda: b.remove(ord('l'))) - self.assertRaises(ValueError, lambda: b.remove(400)) - self.assertRaises(TypeError, lambda: b.remove('e')) - # remove first and last - b.remove(ord('o')) - b.remove(ord('h')) - self.assertEqual(b, b'e') - self.assertRaises(TypeError, lambda: b.remove(b'e')) - b.remove(Indexable(ord('e'))) - self.assertEqual(b, b'') - - def test_pop(self): - b = bytearray(b'world') - self.assertEqual(b.pop(), ord('d')) - self.assertEqual(b.pop(0), ord('w')) - self.assertEqual(b.pop(-2), ord('r')) - self.assertRaises(IndexError, lambda: b.pop(10)) - self.assertRaises(IndexError, lambda: bytearray().pop()) - # test for issue #6846 - self.assertEqual(bytearray(b'\xff').pop(), 0xff) - - def test_nosort(self): - self.assertRaises(AttributeError, lambda: bytearray().sort()) - - def test_append(self): - b = bytearray(b'hell') - b.append(ord('o')) - self.assertEqual(b, b'hello') - self.assertEqual(b.append(100), None) - b = bytearray() - b.append(ord('A')) - self.assertEqual(len(b), 1) - self.assertRaises(TypeError, lambda: b.append(b'o')) - b = bytearray() - b.append(Indexable(ord('A'))) - self.assertEqual(b, b'A') - - def test_insert(self): - b = bytearray(b'msssspp') - b.insert(1, ord('i')) - b.insert(4, ord('i')) - b.insert(-2, ord('i')) - b.insert(1000, ord('i')) - self.assertEqual(b, b'mississippi') - self.assertRaises(TypeError, lambda: b.insert(0, b'1')) - b = bytearray() - b.insert(0, Indexable(ord('A'))) - self.assertEqual(b, b'A') - - def test_copied(self): - # Issue 4348. Make sure that operations that don't mutate the array - # copy the bytes. - b = bytearray(b'abc') - self.assertFalse(b is b.replace(b'abc', b'cde', 0)) - - t = bytearray([i for i in range(256)]) - x = bytearray(b'') - self.assertFalse(x is x.translate(t)) - - def test_partition_bytearray_doesnt_share_nullstring(self): - a, b, c = bytearray(b"x").partition(b"y") - self.assertEqual(b, b"") - self.assertEqual(c, b"") - self.assertTrue(b is not c) - b += b"!" - self.assertEqual(c, b"") - a, b, c = bytearray(b"x").partition(b"y") - self.assertEqual(b, b"") - self.assertEqual(c, b"") - # Same for rpartition - b, c, a = bytearray(b"x").rpartition(b"y") - self.assertEqual(b, b"") - self.assertEqual(c, b"") - self.assertTrue(b is not c) - b += b"!" - self.assertEqual(c, b"") - c, b, a = bytearray(b"x").rpartition(b"y") - self.assertEqual(b, b"") - self.assertEqual(c, b"") - - def test_resize_forbidden(self): - # #4509: can't resize a bytearray when there are buffer exports, even - # if it wouldn't reallocate the underlying buffer. - # Furthermore, no destructive changes to the buffer may be applied - # before raising the error. - b = bytearray(range(10)) - v = memoryview(b) - def resize(n): - b[1:-1] = range(n + 1, 2*n - 1) - resize(10) - orig = b[:] - self.assertRaises(BufferError, resize, 11) - self.assertEqual(b, orig) - self.assertRaises(BufferError, resize, 9) - self.assertEqual(b, orig) - self.assertRaises(BufferError, resize, 0) - self.assertEqual(b, orig) - # Other operations implying resize - self.assertRaises(BufferError, b.pop, 0) - self.assertEqual(b, orig) - self.assertRaises(BufferError, b.remove, b[1]) - self.assertEqual(b, orig) - def delitem(): - del b[1] - self.assertRaises(BufferError, delitem) - self.assertEqual(b, orig) - # deleting a non-contiguous slice - def delslice(): - b[1:-1:2] = b"" - self.assertRaises(BufferError, delslice) - self.assertEqual(b, orig) - - -@unittest.expectedFailure -class AssortedBytesTest(unittest.TestCase): - # - # Test various combinations of bytes and bytearray - # - - @check_bytes_warnings - def test_repr_str(self): - for f in str, repr: - self.assertEqual(f(bytearray()), "bytearray(b'')") - self.assertEqual(f(bytearray([0])), "bytearray(b'\\x00')") - self.assertEqual(f(bytearray([0, 1, 254, 255])), - "bytearray(b'\\x00\\x01\\xfe\\xff')") - self.assertEqual(f(b"abc"), "b'abc'") - self.assertEqual(f(b"'"), '''b"'"''') # ''' - self.assertEqual(f(b"'\""), r"""b'\'"'""") # ' - - def test_compare_bytes_to_bytearray(self): - self.assertEqual(b"abc" == bytes(b"abc"), True) - self.assertEqual(b"ab" != bytes(b"abc"), True) - self.assertEqual(b"ab" <= bytes(b"abc"), True) - self.assertEqual(b"ab" < bytes(b"abc"), True) - self.assertEqual(b"abc" >= bytes(b"ab"), True) - self.assertEqual(b"abc" > bytes(b"ab"), True) - - self.assertEqual(b"abc" != bytes(b"abc"), False) - self.assertEqual(b"ab" == bytes(b"abc"), False) - self.assertEqual(b"ab" > bytes(b"abc"), False) - self.assertEqual(b"ab" >= bytes(b"abc"), False) - self.assertEqual(b"abc" < bytes(b"ab"), False) - self.assertEqual(b"abc" <= bytes(b"ab"), False) - - self.assertEqual(bytes(b"abc") == b"abc", True) - self.assertEqual(bytes(b"ab") != b"abc", True) - self.assertEqual(bytes(b"ab") <= b"abc", True) - self.assertEqual(bytes(b"ab") < b"abc", True) - self.assertEqual(bytes(b"abc") >= b"ab", True) - self.assertEqual(bytes(b"abc") > b"ab", True) - - self.assertEqual(bytes(b"abc") != b"abc", False) - self.assertEqual(bytes(b"ab") == b"abc", False) - self.assertEqual(bytes(b"ab") > b"abc", False) - self.assertEqual(bytes(b"ab") >= b"abc", False) - self.assertEqual(bytes(b"abc") < b"ab", False) - self.assertEqual(bytes(b"abc") <= b"ab", False) - - @test.support.requires_docstrings - def test_doc(self): - self.assertIsNotNone(bytearray.__doc__) - self.assertTrue(bytearray.__doc__.startswith("bytearray("), bytearray.__doc__) - self.assertIsNotNone(bytes.__doc__) - self.assertTrue(bytes.__doc__.startswith("bytes("), bytes.__doc__) - - def test_from_bytearray(self): - sample = bytes(b"Hello world\n\x80\x81\xfe\xff") - buf = memoryview(sample) - b = bytearray(buf) - self.assertEqual(b, bytearray(sample)) - - @check_bytes_warnings - def test_to_str(self): - self.assertEqual(str(b''), "b''") - self.assertEqual(str(b'x'), "b'x'") - self.assertEqual(str(b'\x80'), "b'\\x80'") - self.assertEqual(str(bytearray(b'')), "bytearray(b'')") - self.assertEqual(str(bytearray(b'x')), "bytearray(b'x')") - self.assertEqual(str(bytearray(b'\x80')), "bytearray(b'\\x80')") - - def test_literal(self): - tests = [ - (b"Wonderful spam", "Wonderful spam"), - (br"Wonderful spam too", "Wonderful spam too"), - (b"\xaa\x00\000\200", "\xaa\x00\000\200"), - (br"\xaa\x00\000\200", r"\xaa\x00\000\200"), - ] - for b, s in tests: - self.assertEqual(b, bytearray(s, 'latin-1')) - for c in range(128, 256): - self.assertRaises(SyntaxError, eval, - 'b"%s"' % chr(c)) - - def test_translate(self): - b = b'hello' - ba = bytearray(b) - rosetta = bytearray(range(0, 256)) - rosetta[ord('o')] = ord('e') - c = b.translate(rosetta, b'l') - self.assertEqual(b, b'hello') - self.assertEqual(c, b'hee') - c = ba.translate(rosetta, b'l') - self.assertEqual(ba, b'hello') - self.assertEqual(c, b'hee') - c = b.translate(None, b'e') - self.assertEqual(c, b'hllo') - c = ba.translate(None, b'e') - self.assertEqual(c, b'hllo') - self.assertRaises(TypeError, b.translate, None, None) - self.assertRaises(TypeError, ba.translate, None, None) - - def test_split_bytearray(self): - self.assertEqual(b'a b'.split(memoryview(b' ')), [b'a', b'b']) - - def test_rsplit_bytearray(self): - self.assertEqual(b'a b'.rsplit(memoryview(b' ')), [b'a', b'b']) - - def test_return_self(self): - # bytearray.replace must always return a new bytearray - b = bytearray() - self.assertFalse(b.replace(b'', b'') is b) - - def test_compare(self): - if sys.flags.bytes_warning: - def bytes_warning(): - return test.support.check_warnings(('', BytesWarning)) - with bytes_warning(): - b'' == '' - with bytes_warning(): - b'' != '' - with bytes_warning(): - bytearray(b'') == '' - with bytes_warning(): - bytearray(b'') != '' - else: - self.skipTest("BytesWarning is needed for this test: use -bb option") - - # Optimizations: - # __iter__? (optimization) - # __reversed__? (optimization) - - # XXX More string methods? (Those that don't use character properties) - - # There are tests in string_tests.py that are more - # comprehensive for things like split, partition, etc. - # Unfortunately they are all bundled with tests that - # are not appropriate for bytes - - # I've started porting some of those into bytearray_tests.py, we should port - # the rest that make sense (the code can be cleaned up to use modern - # unittest methods at the same time). - -@unittest.expectedFailure -class BytearrayPEP3137Test(unittest.TestCase, - test.buffer_tests.MixinBytesBufferCommonTests): - def marshal(self, x): - return bytearray(x) - - def test_returns_new_copy(self): - val = self.marshal(b'1234') - # On immutable types these MAY return a reference to themselves - # but on mutable types like bytearray they MUST return a new copy. - for methname in ('zfill', 'rjust', 'ljust', 'center'): - method = getattr(val, methname) - newval = method(3) - self.assertEqual(val, newval) - self.assertTrue(val is not newval, - methname+' returned self on a mutable object') - for expr in ('val.split()[0]', 'val.rsplit()[0]', - 'val.partition(b".")[0]', 'val.rpartition(b".")[2]', - 'val.splitlines()[0]', 'val.replace(b"", b"")'): - newval = eval(expr) - self.assertEqual(val, newval) - self.assertTrue(val is not newval, - expr+' returned val on a mutable object') - -class FixedStringTest(test.string_tests.BaseTest): - - def fixtype(self, obj): - if isinstance(obj, str): - return obj.encode("utf-8") - return super().fixtype(obj) - - # Currently the bytes containment testing uses a single integer - # value. This may not be the final design, but until then the - # bytes section with in a bytes containment not valid - def test_contains(self): - pass - def test_expandtabs(self): - pass - def test_upper(self): - pass - def test_lower(self): - pass - -@unittest.expectedFailure -class ByteArrayAsStringTest(FixedStringTest, unittest.TestCase): - type2test = bytearray - contains_bytes = True - -@unittest.expectedFailure -class BytesAsStringTest(FixedStringTest, unittest.TestCase): - type2test = bytes - contains_bytes = True - - -class SubclassTest: - - def test_basic(self): - self.assertTrue(issubclass(self.subclass2test, self.type2test)) - self.assertIsInstance(self.subclass2test(), self.type2test) - - a, b = b"abcd", b"efgh" - _a, _b = self.subclass2test(a), self.subclass2test(b) - - # test comparison operators with subclass instances - self.assertTrue(_a == _a) - self.assertTrue(_a != _b) - self.assertTrue(_a < _b) - self.assertTrue(_a <= _b) - self.assertTrue(_b >= _a) - self.assertTrue(_b > _a) - self.assertTrue(_a is not a) - - # test concat of subclass instances - self.assertEqual(a + b, _a + _b) - self.assertEqual(a + b, a + _b) - self.assertEqual(a + b, _a + b) - - # test repeat - self.assertTrue(a*5 == _a*5) - - def test_join(self): - # Make sure join returns a NEW object for single item sequences - # involving a subclass. - # Make sure that it is of the appropriate type. - s1 = self.subclass2test(b"abcd") - s2 = self.type2test().join([s1]) - self.assertTrue(s1 is not s2) - self.assertTrue(type(s2) is self.type2test, type(s2)) - - # Test reverse, calling join on subclass - s3 = s1.join([b"abcd"]) - self.assertTrue(type(s3) is self.type2test) - - def test_pickle(self): - a = self.subclass2test(b"abcd") - a.x = 10 - a.y = self.subclass2test(b"efgh") - for proto in range(pickle.HIGHEST_PROTOCOL + 1): - b = pickle.loads(pickle.dumps(a, proto)) - self.assertNotEqual(id(a), id(b)) - self.assertEqual(a, b) - self.assertEqual(a.x, b.x) - self.assertEqual(a.y, b.y) - self.assertEqual(type(a), type(b)) - self.assertEqual(type(a.y), type(b.y)) - - def test_copy(self): - a = self.subclass2test(b"abcd") - a.x = 10 - a.y = self.subclass2test(b"efgh") - for copy_method in (copy.copy, copy.deepcopy): - b = copy_method(a) - self.assertNotEqual(id(a), id(b)) - self.assertEqual(a, b) - self.assertEqual(a.x, b.x) - self.assertEqual(a.y, b.y) - self.assertEqual(type(a), type(b)) - self.assertEqual(type(a.y), type(b.y)) - - -class ByteArraySubclass(bytearray): - pass - -class BytesSubclass(bytes): - pass - -@unittest.expectedFailure -class ByteArraySubclassTest(SubclassTest, unittest.TestCase): - type2test = bytearray - subclass2test = ByteArraySubclass - - def test_init_override(self): - class subclass(bytearray): - def __init__(me, newarg=1, *args, **kwargs): - bytearray.__init__(me, *args, **kwargs) - x = subclass(4, b"abcd") - x = subclass(4, source=b"abcd") - self.assertEqual(x, b"abcd") - x = subclass(newarg=4, source=b"abcd") - self.assertEqual(x, b"abcd") - - -@unittest.expectedFailure -class BytesSubclassTest(SubclassTest, unittest.TestCase): - type2test = bytes - subclass2test = BytesSubclass - - -if __name__ == "__main__": - unittest.main() diff --git a/future/tests/test_builtins.py b/future/tests/test_builtins.py deleted file mode 100644 index c24b295f..00000000 --- a/future/tests/test_builtins.py +++ /dev/null @@ -1,159 +0,0 @@ -""" -Tests to make sure the behaviour of the builtins is sensible and correct. -""" - -from __future__ import absolute_import, division, unicode_literals -from future.builtins import * -from future.utils import PY3 -from future.tests.base import unittest - -import textwrap -from subprocess import Popen, PIPE -from numbers import Integral -from decimal import Decimal - - -class TestBuiltins(unittest.TestCase): - def test_super(self): - class verbose_list(list): - ''' - A class that uses the new simpler super() function - ''' - def append(self, item): - print('Adding an item') - super().append(item) - - l = verbose_list() - l.append('blah') - self.assertEqual(l[0], 'blah') - self.assertEqual(len(l), 1) - self.assertTrue(isinstance(l, list)) - - def test_isinstance_int(self): - """ - Redefining ``int`` to a ``long`` subclass on Py2 makes this - test fail unless isinstance() is defined appropriately: - """ - self.assertTrue(isinstance(0, int)) - self.assertTrue(isinstance(int(1), int)) - self.assertFalse(isinstance(1.0, int)) - - def test_isinstance_Integral(self): - """ - Tests the preferred alternative to the above - """ - self.assertTrue(isinstance(0, Integral)) - - def test_isinstance_long(self): - """ - Py2's long doesn't inherit from int! - """ - self.assertTrue(isinstance(10**100, int)) - self.assertTrue(isinstance(int(2**64), int)) - if not PY3: - self.assertTrue(isinstance(long(1), int)) - # Note: the following is a SyntaxError on Py3: - # self.assertTrue(isinstance(1L, int)) - - def test_isinstance_bytes(self): - self.assertTrue(isinstance(b'byte-string', bytes)) - self.assertFalse(isinstance(b'byte-string', str)) - - def test_isinstance_str(self): - self.assertTrue(isinstance('string', str)) - self.assertTrue(isinstance(u'string', str)) - self.assertFalse(isinstance(u'string', bytes)) - - @unittest.expectedFailure - def test_type(self): - """ - The following fails when passed a unicode string on Python - (including when unicode_literals is in effect) and fails when - passed a byte-string on Python 3. So type() always wants a native - string as the first argument. - - TODO: maybe provide a replacement that works identically on Py2/3? - """ - mytype = type('blah', (dict,), {"old": 1, "new": 2}) - d = mytype() - self.assertTrue(isinstance(d, mytype)) - self.assertTrue(isinstance(d, dict)) - - def test_isinstance_tuple_of_types(self): - # These two should be equivalent, even if ``int`` is a special - # backported type. - label = 1 - self.assertTrue(isinstance(label, (float, Decimal)) or - isinstance(label, int)) - self.assertTrue(isinstance(label, (float, Decimal, int))) - self.assertTrue(isinstance(10**100, (float, Decimal, int))) - - self.assertTrue(isinstance(b'blah', (str, bytes))) - self.assertTrue(isinstance(b'blah', (bytes, float, int))) - - self.assertFalse(isinstance(b'blah', (str, Decimal, float, int))) - - self.assertTrue(isinstance('blah', (str, Decimal, float, int))) - self.assertTrue(isinstance(u'blah', (Decimal, float, int, str))) - - self.assertFalse(isinstance('blah', (bytes, Decimal, float, int))) - - @unittest.skipIf(sys.version_info[:2] == (2, 6), - 'not yet implemented for Py2.6') - def test_round(self): - """ - Note that the Python 2.x round() function fails these tests. The - Python 3.x round() function passes them, as should our custom - round() function. - """ - self.assertEqual(round(0.1250, 2), 0.12) - self.assertEqual(round(0.1350, 2), 0.14) - self.assertEqual(round(0.1251, 2), 0.13) - self.assertEqual(round(0.125000001, 2), 0.13) - self.assertEqual(round(123.5, 0), 124.0) - self.assertEqual(round(123.5), 124) - self.assertEqual(round(12.35, 2), 12.35) - self.assertEqual(round(12.35, 1), 12.3) - self.assertEqual(round(12.35, 0), 12.0) - self.assertEqual(round(123.5, 1), 123.5) - - self.assertTrue(isinstance(round(123.5, 0), float)) - self.assertTrue(isinstance(round(123.5), Integral)) - - @unittest.skip('negative ndigits not implemented yet') - def test_round_negative_ndigits(self): - self.assertEqual(round(10.1350, 0), 10.0) - self.assertEqual(round(10.1350, -1), 10.0) - self.assertEqual(round(10.1350, -2), 0.0) - self.assertEqual(round(10.1350, -3), 0.0) - - self.assertEqual(round(12.35, -1), 10.0) - self.assertEqual(round(12.35, -2), 0.0) - self.assertEqual(round(123.5, -1), 120.0) - self.assertEqual(round(123.5, -2), 100.0) - self.assertEqual(round(123.551, -2), 100.0) - self.assertEqual(round(123.551, -3), 0.0) - - def test_input(self, interpreter='python2'): - """ - Passes in a string to the waiting input() - """ - code = ''' - from future.builtins import input - def greet(name): - print "Hello, {0}!".format(name) - print "What's your name?" - name = input() - greet(name) - ''' - with open('mytestscript.py', 'w') as f: - f.write(textwrap.dedent(code)) - p1 = Popen([interpreter, 'mytestscript.py'], stdout=PIPE, stdin=PIPE, stderr=None) - (stdout, stderr) = p1.communicate(b'Ed') - # print(stdout) - # print(stderr) - self.assertEqual(stdout, b"What's your name?\nHello, Ed!\n") - - -if __name__ == '__main__': - unittest.main() diff --git a/future/tests/test_bytes.py b/future/tests/test_bytes.py deleted file mode 100644 index 882fcfa8..00000000 --- a/future/tests/test_bytes.py +++ /dev/null @@ -1,434 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Tests for the backported bytes object -""" - -from __future__ import absolute_import, unicode_literals, print_function -from future.builtins import * -from future import utils - -from numbers import Integral -from future.tests.base import unittest - - -TEST_UNICODE_STR = u'ℝεα∂@ßʟ℮ ☂ℯṧт υηḯ¢☺ḓ℮' -# Tk icon as a .gif: -TEST_BYTE_STR = b'GIF89a\x0e\x00\x0b\x00\x80\xff\x00\xff\x00\x00\xc0\xc0\xc0!\xf9\x04\x01\x00\x00\x01\x00,\x00\x00\x00\x00\x0e\x00\x0b\x00@\x02\x1f\x0c\x8e\x10\xbb\xcan\x90\x99\xaf&\xd8\x1a\xce\x9ar\x06F\xd7\xf1\x90\xa1c\x9e\xe8\x84\x99\x89\x97\xa2J\x01\x00;\x1a\x14\x00;;\xba\nD\x14\x00\x00;;' - - -class TestBytes(unittest.TestCase): - def test_bytes_encoding_arg(self): - """ - The bytes class has changed in Python 3 to accept an - additional argument in the constructor: encoding. - - It would be nice to support this without breaking the - isinstance(..., bytes) test below. - """ - u = u'Unicode string: \u5b54\u5b50' - b = bytes(u, encoding='utf-8') - self.assertEqual(b, u.encode('utf-8')) - - def test_bytes_string_no_encoding(self): - with self.assertRaises(TypeError): - bytes(u'ABC') - - def test_bytes_int(self): - """ - In Py3, bytes(int) -> bytes object of size given by the parameter initialized with null - """ - self.assertEqual(bytes(5), b'\x00\x00\x00\x00\x00') - # Test using newint: - self.assertEqual(bytes(int(5)), b'\x00\x00\x00\x00\x00') - self.assertTrue(isinstance(bytes(int(5)), bytes)) - - # Negative counts are not allowed in Py3: - with self.assertRaises(ValueError): - bytes(-1) - with self.assertRaises(ValueError): - bytes(int(-1)) - - @unittest.skipIf(utils.PY3, 'test not needed on Py3: all ints are long') - def test_bytes_long(self): - """ - As above, but explicitly feeding in a long on Py2. Note that - checks like: - isinstance(n, int) - are fragile on Py2, because isinstance(10L, int) is False. - """ - m = long(5) - n = long(-1) - self.assertEqual(bytes(m), b'\x00\x00\x00\x00\x00') - # Negative counts are not allowed in Py3: - with self.assertRaises(ValueError): - bytes(n) - - def test_bytes_empty(self): - """ - bytes() -> b'' - """ - self.assertEqual(bytes(), b'') - - def test_bytes_iterable_of_ints(self): - self.assertEqual(bytes([65, 66, 67]), b'ABC') - self.assertEqual(bytes([int(120), int(121), int(122)]), b'xyz') - - def test_bytes_bytes(self): - self.assertEqual(bytes(b'ABC'), b'ABC') - - def test_bytes_is_bytes(self): - b = bytes(b'ABC') - self.assertTrue(bytes(b) is b) - self.assertEqual(repr(bytes(b)), "b'ABC'") - - def test_bytes_fromhex(self): - self.assertEqual(bytes.fromhex('bb 0f'), b'\xbb\x0f') - self.assertEqual(bytes.fromhex('1234'), b'\x124') - self.assertEqual(bytes.fromhex('12ffa0'), b'\x12\xff\xa0') - b = b'My bytestring' - self.assertEqual(bytes(b).fromhex('bb 0f'), b'\xbb\x0f') - - def test_isinstance_bytes(self): - self.assertTrue(isinstance(bytes(b'blah'), bytes)) - - @unittest.expectedFailure - def test_isinstance_oldbytestrings_bytes(self): - """ - Watch out for this. Byte-strings produced in various places in Py2 - are of type 'str'. With 'from future.builtins import bytes', 'bytes' - is redefined to be a subclass of 'str', not just an alias for 'str'. - """ - self.assertTrue(isinstance(b'blah', bytes)) # not with the redefined bytes obj - self.assertTrue(isinstance(u'blah'.encode('utf-8'), bytes)) # not with the redefined bytes obj - - def test_bytes_getitem(self): - b = bytes(b'ABCD') - self.assertEqual(b[0], 65) - self.assertEqual(b[-1], 68) - self.assertEqual(b[0:1], b'A') - self.assertEqual(b[:], b'ABCD') - - @unittest.expectedFailure - def test_b_literal_creates_newbytes_object(self): - """ - It would nice if the b'' literal syntax could be coaxed into producing - bytes objects somehow ... ;) - """ - b = b'ABCD' - self.assertTrue(isinstance(b, bytes)) - self.assertEqual(b[0], 65) - self.assertTrue(repr(b).startswith('b')) - - def test_repr(self): - b = bytes(b'ABCD') - self.assertTrue(repr(b).startswith('b')) - - def test_str(self): - b = bytes(b'ABCD') - self.assertTrue(str(b), "b'ABCD'") - - def test_bytes_setitem(self): - b = b'ABCD' - with self.assertRaises(TypeError): - b[0] = b'B' - - def test_bytes_iteration(self): - b = bytes(b'ABCD') - for item in b: - self.assertTrue(isinstance(item, Integral)) - self.assertEqual(list(b), [65, 66, 67, 68]) - - def test_bytes_plus_unicode_string(self): - b = bytes(b'ABCD') - u = u'EFGH' - with self.assertRaises(TypeError): - b + u - - with self.assertRaises(TypeError): - u + b - - def test_bytes_plus_bytes(self): - b1 = bytes(b'ABCD') - b2 = b1 + b1 - self.assertEqual(b2, b'ABCDABCD') - self.assertTrue(isinstance(b2, bytes)) - - b3 = b1 + b'ZYXW' - self.assertEqual(b3, b'ABCDZYXW') - self.assertTrue(isinstance(b3, bytes)) - - b4 = b'ZYXW' + b1 - self.assertEqual(b4, b'ZYXWABCD') - self.assertTrue(isinstance(b4, bytes)) - - def test_bytes_join_bytes(self): - b = bytes(b' * ') - strings = [b'AB', b'EFGH', b'IJKL'] - result = b.join(strings) - self.assertEqual(result, b'AB * EFGH * IJKL') - self.assertTrue(isinstance(result, bytes)) - - def test_bytes_join_others(self): - b = bytes(b' ') - with self.assertRaises(TypeError): - b.join([42]) - with self.assertRaises(TypeError): - b.join(b'blah') - with self.assertRaises(TypeError): - b.join(bytes(b'blah')) - - def test_bytes_join_unicode_strings(self): - b = bytes(b'ABCD') - strings = [u'EFGH', u'IJKL'] - with self.assertRaises(TypeError): - b.join(strings) - - def test_bytes_replace(self): - b = bytes(b'ABCD') - c = b.replace(b'A', b'F') - self.assertEqual(c, b'FBCD') - self.assertTrue(isinstance(c, bytes)) - - with self.assertRaises(TypeError): - b.replace(b'A', u'F') - with self.assertRaises(TypeError): - b.replace(u'A', b'F') - - def test_bytes_partition(self): - b1 = bytes(b'ABCD') - parts = b1.partition(b'B') - self.assertEqual(parts, (b'A', b'B', b'CD')) - self.assertTrue(all([isinstance(p, bytes) for p in parts])) - - b2 = bytes(b'ABCDABCD') - parts = b2.partition(b'B') - self.assertEqual(parts, (b'A', b'B', b'CDABCD')) - - def test_bytes_rpartition(self): - b2 = bytes(b'ABCDABCD') - parts = b2.rpartition(b'B') - self.assertEqual(parts, (b'ABCDA', b'B', b'CD')) - self.assertTrue(all([isinstance(p, bytes) for p in parts])) - - def test_bytes_contains_something(self): - b = bytes(b'ABCD') - self.assertTrue(b'A' in b) - self.assertTrue(65 in b) - - self.assertTrue(b'AB' in b) - self.assertTrue(bytes([65, 66]) in b) - - self.assertFalse(b'AC' in b) - self.assertFalse(bytes([65, 67]) in b) - - self.assertFalse(b'Z' in b) - self.assertFalse(99 in b) - - with self.assertRaises(TypeError): - u'A' in b - - def test_bytes_index(self): - b = bytes(b'ABCD') - self.assertEqual(b.index(b'B'), 1) - self.assertEqual(b.index(67), 2) - - def test_startswith(self): - b = bytes(b'abcd') - self.assertTrue(b.startswith(b'a')) - self.assertTrue(b.startswith((b'a', b'b'))) - self.assertTrue(b.startswith(bytes(b'ab'))) - self.assertFalse(b.startswith((b'A', b'B'))) - - with self.assertRaises(TypeError) as cm: - b.startswith(65) - with self.assertRaises(TypeError) as cm: - b.startswith([b'A']) - exc = str(cm.exception) - # self.assertIn('bytes', exc) - # self.assertIn('tuple', exc) - - def test_endswith(self): - b = bytes(b'abcd') - self.assertTrue(b.endswith(b'd')) - self.assertTrue(b.endswith((b'c', b'd'))) - self.assertTrue(b.endswith(bytes(b'cd'))) - self.assertFalse(b.endswith((b'A', b'B'))) - - with self.assertRaises(TypeError) as cm: - b.endswith(65) - with self.assertRaises(TypeError) as cm: - b.endswith([b'D']) - exc = str(cm.exception) - # self.assertIn('bytes', exc) - # self.assertIn('tuple', exc) - - def test_decode(self): - b = bytes(b'abcd') - s = b.decode('utf-8') - self.assertEqual(s, 'abcd') - self.assertTrue(isinstance(s, str)) - - def test_encode(self): - b = bytes(b'abcd') - with self.assertRaises(AttributeError) as cm: - b.encode('utf-8') - - def test_eq(self): - """ - Equals: == - """ - b = bytes(b'ABCD') - self.assertEqual(b, b'ABCD') - self.assertTrue(b == b'ABCD') - self.assertEqual(b'ABCD', b) - self.assertEqual(b, b) - self.assertFalse(b == b'ABC') - self.assertFalse(b == bytes(b'ABC')) - self.assertFalse(b == u'ABCD') - self.assertFalse(b == str('ABCD')) - # Fails: - # self.assertFalse(u'ABCD' == b) - self.assertFalse(str('ABCD') == b) - - self.assertFalse(b == list(b)) - self.assertFalse(b == str(b)) - self.assertFalse(b == u'ABC') - self.assertFalse(bytes(b'Z') == 90) - - def test_ne(self): - b = bytes(b'ABCD') - self.assertFalse(b != b) - self.assertFalse(b != b'ABCD') - self.assertTrue(b != b'ABCDEFG') - self.assertTrue(b != bytes(b'ABCDEFG')) - self.assertTrue(b'ABCDEFG' != b) - - # self.assertTrue(b'ABCD' != u'ABCD') - self.assertTrue(b != u'ABCD') - self.assertTrue(b != u'ABCDE') - self.assertTrue(bytes(b'') != str(u'')) - self.assertTrue(str(u'') != bytes(b'')) - - self.assertTrue(b != list(b)) - self.assertTrue(b != str(b)) - - def test_hash(self): - d = {} - b = bytes(b'ABCD') - native_b = b'ABCD' - s = str('ABCD') - native_s = u'ABCD' - d[b] = b - d[s] = s - self.assertEqual(len(d), 2) - # This should overwrite d[s] but not d[b]: - d[native_s] = native_s - self.assertEqual(len(d), 2) - # This should overwrite d[native_s] again: - d[s] = s - self.assertEqual(len(d), 2) - self.assertEqual(set(d.keys()), set([s, b])) - - @unittest.expectedFailure - def test_hash_with_native_types(self): - # Warning: initializing the dict with native Py2 types throws the - # hashing out: - d = {u'ABCD': u'ABCD', b'ABCD': b'ABCD'} - # On Py2: len(d) == 1 - b = bytes(b'ABCD') - s = str('ABCD') - d[s] = s - d[b] = b - # Fails: - self.assertEqual(len(d) > 1) - - def test_add(self): - b = bytes(b'ABC') - c = bytes(b'XYZ') - d = b + c - self.assertTrue(isinstance(d, bytes)) - self.assertEqual(d, b'ABCXYZ') - f = b + b'abc' - self.assertTrue(isinstance(f, bytes)) - self.assertEqual(f, b'ABCabc') - g = b'abc' + b - self.assertTrue(isinstance(g, bytes)) - self.assertEqual(g, b'abcABC') - - def test_cmp(self): - b = bytes(b'ABC') - with self.assertRaises(TypeError): - b > 3 - with self.assertRaises(TypeError): - b > u'XYZ' - with self.assertRaises(TypeError): - b <= 3 - with self.assertRaises(TypeError): - b >= int(3) - with self.assertRaises(TypeError): - b < 3.3 - with self.assertRaises(TypeError): - b > (3.3 + 3j) - with self.assertRaises(TypeError): - b >= (1, 2) - with self.assertRaises(TypeError): - b <= [1, 2] - - def test_mul(self): - b = bytes(b'ABC') - c = b * 4 - self.assertTrue(isinstance(c, bytes)) - self.assertEqual(c, b'ABCABCABCABC') - d = b * int(4) - self.assertTrue(isinstance(d, bytes)) - self.assertEqual(d, b'ABCABCABCABC') - if utils.PY2: - e = b * long(4) - self.assertTrue(isinstance(e, bytes)) - self.assertEqual(e, b'ABCABCABCABC') - - def test_rmul(self): - b = bytes(b'XYZ') - c = 3 * b - self.assertTrue(isinstance(c, bytes)) - self.assertEqual(c, b'XYZXYZXYZ') - d = b * int(3) - self.assertTrue(isinstance(d, bytes)) - self.assertEqual(d, b'XYZXYZXYZ') - if utils.PY2: - e = long(3) * b - self.assertTrue(isinstance(e, bytes)) - self.assertEqual(e, b'XYZXYZXYZ') - - def test_slice(self): - b = bytes(b'ABCD') - c1 = b[:] - self.assertTrue(isinstance(c1, bytes)) - self.assertTrue(c1 == b) - # The following is not true, whereas it is true normally on Py2 and - # Py3. Does this matter?: - # self.assertTrue(c1 is b) - - c2 = b[10:] - self.assertTrue(isinstance(c2, bytes)) - self.assertTrue(c2 == bytes(b'')) - self.assertTrue(c2 == b'') - - c3 = b[:0] - self.assertTrue(isinstance(c3, bytes)) - self.assertTrue(c3 == bytes(b'')) - self.assertTrue(c3 == b'') - - c4 = b[:1] - self.assertTrue(isinstance(c4, bytes)) - self.assertTrue(c4 == bytes(b'A')) - self.assertTrue(c4 == b'A') - - c5 = b[:-1] - self.assertTrue(isinstance(c5, bytes)) - self.assertTrue(c5 == bytes(b'ABC')) - self.assertTrue(c5 == b'ABC') - - -if __name__ == '__main__': - unittest.main() diff --git a/future/tests/test_futurize.py b/future/tests/test_futurize.py deleted file mode 100644 index 19f86d7f..00000000 --- a/future/tests/test_futurize.py +++ /dev/null @@ -1,636 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function - -import pprint -from subprocess import Popen, PIPE -import os - -from future.tests.base import CodeHandler, unittest - - -class TestFuturizeSimple(CodeHandler): - """ - This class contains snippets of Python 2 code (invalid Python 3) and - tests for whether they can be passed to ``futurize`` and immediately - run under both Python 2 again and Python 3. - """ - - @unittest.expectedFailure - def test_problematic_string(self): - """ This string generates a SyntaxError on Python 3 unless it has - an r prefix. - """ - before = r""" - s = 'The folder is "C:\Users"'. - """ - after = r""" - s = r'The folder is "C:\Users"'. - """ - self.convert_check(before, after) - - def test_tobytes(self): - """ - The --tobytes option converts all UNADORNED string literals 'abcd' to b'abcd'. - It does apply to multi-line strings but doesn't apply if it's a raw - string, because ur'abcd' is a SyntaxError on Python 2 and br'abcd' is a - SyntaxError on Python 3. - """ - before = r""" - s0 = '1234' - s1 = '''5678 - ''' - s2 = "9abc" - # Unchanged: - s3 = r'1234' - s4 = R"defg" - s5 = u'hijk' - s6 = u"lmno" - s7 = b'lmno' - s8 = b"pqrs" - """ - after = r""" - s0 = b'1234' - s1 = b'''5678 - ''' - s2 = b"9abc" - # Unchanged: - s3 = r'1234' - s4 = R"defg" - s5 = u'hijk' - s6 = u"lmno" - s7 = b'lmno' - s8 = b"pqrs" - """ - self.convert_check(before, after, tobytes=True) - - @unittest.expectedFailure - def test_izip(self): - before = """ - from itertools import izip - for (a, b) in izip([1, 3, 5], [2, 4, 6]): - pass - """ - after = """ - from __future__ import unicode_literals - from future.builtins import zip - for (a, b) in zip([1, 3, 5], [2, 4, 6]): - pass - """ - self.convert_check(before, after, stages=(1, 2), ignore_imports=False) - - @unittest.expectedFailure - def test_no_unneeded_list_calls(self): - """ - TODO: get this working - """ - code = """ - for (a, b) in zip(range(3), range(3, 6)): - pass - """ - self.unchanged(code) - - def test_import_builtins(self): - before = """ - a = raw_input() - b = open(a, b, c) - c = filter(a, b) - d = map(a, b) - e = isinstance(a, str) - f = bytes(a, encoding='utf-8') - for g in xrange(10**10): - pass - super(MyClass, self) - """ - after = """ - from __future__ import unicode_literals - from future.builtins import bytes - from future.builtins import filter - from future.builtins import input - from future.builtins import map - from future.builtins import open - from future.builtins import range - from future.builtins import super - a = input() - b = open(a, b, c) - c = list(filter(a, b)) - d = list(map(a, b)) - e = isinstance(a, str) - f = bytes(a, encoding='utf-8') - for g in range(10**10): - pass - super(MyClass, self) - """ - self.convert_check(before, after, ignore_imports=False, run=False) - - def test_xrange(self): - code = ''' - for i in xrange(10): - pass - ''' - self.convert(code) - - @unittest.expectedFailure - def test_source_coding_utf8(self): - """ - Tests to ensure that the source coding line is not corrupted or - removed. It must be left as the first line in the file (including - before any __future__ imports). Also tests whether the unicode - characters in this encoding are parsed correctly and left alone. - """ - code = """ - # -*- coding: utf-8 -*- - icons = [u"◐", u"◓", u"◑", u"◒"] - """ - self.unchanged(code) - - def test_exception_syntax(self): - """ - Test of whether futurize handles the old-style exception syntax - """ - before = """ - try: - pass - except IOError, e: - val = e.errno - """ - after = """ - try: - pass - except IOError as e: - val = e.errno - """ - self.convert_check(before, after) - - def test_super(self): - """ - This tests whether futurize keeps the old two-argument super() calls the - same as before. It should, because this still works in Py3. - """ - code = ''' - class VerboseList(list): - def append(self, item): - print('Adding an item') - super(VerboseList, self).append(item) - ''' - self.unchanged(code) - - @unittest.expectedFailure - def test_file(self): - """ - file() as a synonym for open() is obsolete and invalid on Python 3. - """ - before = ''' - f = file(__file__) - data = f.read() - f.close() - ''' - after = ''' - f = open(__file__) - data = f.read() - f.close() - ''' - self.convert_check(before, after) - - def test_apply(self): - before = ''' - def addup(*x): - return sum(x) - - assert apply(addup, (10,20)) == 30 - ''' - after = """ - def addup(*x): - return sum(x) - - assert addup(*(10,20)) == 30 - """ - self.convert_check(before, after) - - @unittest.skip('not implemented yet') - def test_download_pypi_package_and_test(self, package_name='future'): - URL = 'http://pypi.python.org/pypi/{0}/json' - - import requests - r = requests.get(URL.format(package_name)) - pprint.pprint(r.json()) - - download_url = r.json()['urls'][0]['url'] - filename = r.json()['urls'][0]['filename'] - # r2 = requests.get(download_url) - # with open('/tmp/' + filename, 'w') as tarball: - # tarball.write(r2.content) - - def test_raw_input(self): - """ - Passes in a string to the waiting input() after futurize - conversion. - - The code is the first snippet from these docs: - http://docs.python.org/2/library/2to3.html - """ - before = """ - def greet(name): - print "Hello, {0}!".format(name) - print "What's your name?" - name = raw_input() - greet(name) - """ - desired = """ - def greet(name): - print("Hello, {0}!".format(name)) - print("What's your name?") - name = input() - greet(name) - """ - self.convert_check(before, desired, run=False) - - for interpreter in self.interpreters: - p1 = Popen([interpreter, self.tempdir + 'mytestscript.py'], - stdout=PIPE, stdin=PIPE, stderr=PIPE, env=self.env) - (stdout, stderr) = p1.communicate(b'Ed') - self.assertEqual(stdout, b"What's your name?\nHello, Ed!\n") - - def test_literal_prefixes_are_not_stripped(self): - """ - Tests to ensure that the u'' and b'' prefixes on unicode strings and - byte strings are not removed by the futurize script. Removing the - prefixes on Py3.3+ is unnecessary and loses some information -- namely, - that the strings have explicitly been marked as unicode or bytes, - rather than just e.g. a guess by some automated tool about what they - are. - """ - code = ''' - s = u'unicode string' - b = b'byte string' - ''' - self.unchanged(code) - - @unittest.expectedFailure - def test_division(self): - """ - TODO: implement this! - """ - before = """ - x = 1 / 2 - """ - after = """ - from future.utils import old_div - x = old_div(1, 2) - """ - self.convert_check(before, after, stages=[1]) - - -class TestFuturizeRenamedStdlib(CodeHandler): - def test_renamed_modules(self): - before = """ - import ConfigParser - import copy_reg - import cPickle - import cStringIO - - s = cStringIO.StringIO('blah') - """ - after = """ - import configparser - import copyreg - import pickle - import io - - s = io.StringIO('blah') - """ - self.convert_check(before, after) - - @unittest.expectedFailure - def test_urllib_refactor(self): - # Code like this using urllib is refactored by futurize --stage2 to use - # the new Py3 module names, but ``future`` doesn't support urllib yet. - before = """ - import urllib - - URL = 'http://pypi.python.org/pypi/future/json' - package_name = 'future' - r = urllib.urlopen(URL.format(package_name)) - data = r.read() - """ - after = """ - import urllib.request - - URL = 'http://pypi.python.org/pypi/future/json' - package_name = 'future' - r = urllib.request.urlopen(URL.format(package_name)) - data = r.read() - """ - self.convert_check(before, after) - - def test_renamed_copy_reg_and_cPickle_modules(self): - """ - Example from docs.python.org/2/library/copy_reg.html - """ - before = """ - import copy_reg - import copy - import cPickle - class C(object): - def __init__(self, a): - self.a = a - - def pickle_c(c): - print('pickling a C instance...') - return C, (c.a,) - - copy_reg.pickle(C, pickle_c) - c = C(1) - d = copy.copy(c) - p = cPickle.dumps(c) - """ - after = """ - import copyreg - import copy - import pickle - class C(object): - def __init__(self, a): - self.a = a - - def pickle_c(c): - print('pickling a C instance...') - return C, (c.a,) - - copyreg.pickle(C, pickle_c) - c = C(1) - d = copy.copy(c) - p = pickle.dumps(c) - """ - self.convert_check(before, after) - - @unittest.expectedFailure - def test_Py2_StringIO_module(self): - """ - Ideally, there would be a fixer for this. For now: - - TODO: add the Py3 equivalent for this to the docs - """ - before = """ - import cStringIO - s = cStringIO.StringIO('my string') - assert isinstance(s, cStringIO.InputType) - """ - after = """ - import io - s = io.StringIO('my string') - # assert isinstance(s, io.InputType) - # There is no io.InputType in Python 3. What should we change this to - # instead? - """ - self.convert_check(before, after) - - -class TestFuturizeStage1(CodeHandler): - """ - Tests "stage 1": safe optimizations: modernizing Python 2 code so that it - uses print functions, new-style exception syntax, etc. - - The behaviour should not change and this should introduce no dependency on - the ``future`` package. It produces more modern Python 2-only code. The - goal is to reduce the size of the real porting patch-set by performing - the uncontroversial patches first. - """ - - def test_apply(self): - """ - apply() should be changed by futurize --stage1 - """ - before = ''' - def f(a, b): - return a + b - - args = (1, 2) - assert apply(f, args) == 3 - assert apply(f, ('a', 'b')) == 'ab' - ''' - after = ''' - def f(a, b): - return a + b - - args = (1, 2) - assert f(*args) == 3 - assert f(*('a', 'b')) == 'ab' - ''' - self.convert_check(before, after, stages=[1]) - - def test_xrange(self): - """ - xrange should not be changed by futurize --stage1 - """ - code = ''' - for i in xrange(10): - pass - ''' - self.unchanged(code, stages=[1]) - - @unittest.expectedFailure - def test_absolute_import_changes(self): - """ - Implicit relative imports should be converted to absolute or explicit - relative imports correctly. - - Issue #16 (with porting bokeh/bbmodel.py) - """ - with open('specialmodels.py', 'w') as f: - f.write('pass') - - before = """ - import specialmodels.pandasmodel - specialmodels.pandasmodel.blah() - """ - after = """ - from __future__ import absolute_import - from .specialmodels import pandasmodel - pandasmodel.blah() - """ - self.convert_check(before, after, stages=[1]) - - def test_safe_futurize_imports(self): - """ - The standard library module names should not be changed until stage 2 - """ - before = """ - import ConfigParser - import HTMLParser - import collections - - ConfigParser.ConfigParser - HTMLParser.HTMLParser - d = collections.OrderedDict() - """ - self.unchanged(before, stages=[1]) - - def test_print(self): - before = """ - print 'Hello' - """ - after = """ - print('Hello') - """ - self.convert_check(before, after, stages=[1]) - - before = """ - import sys - print >> sys.stderr, 'Hello', 'world' - """ - after = """ - import sys - print('Hello', 'world', file=sys.stderr) - """ - self.convert_check(before, after, stages=[1]) - - def test_print_already_function(self): - """ - Running futurize --stage1 should not add a second set of parentheses - """ - before = """ - print('Hello') - """ - self.unchanged(before, stages=[1]) - - @unittest.expectedFailure - def test_print_already_function_complex(self): - """ - Running futurize --stage1 does add a second second set of parentheses - in this case. This is because the underlying lib2to3 has two distinct - grammars -- with a print statement and with a print function -- and, - when going forwards (2 to both), futurize assumes print is a statement, - which raises a ParseError. - """ - before = """ - import sys - print('Hello', 'world', file=sys.stderr) - """ - self.unchanged(before, stages=[1]) - - def test_exceptions(self): - before = """ - try: - raise AttributeError('blah') - except AttributeError, e: - pass - """ - after = """ - try: - raise AttributeError('blah') - except AttributeError as e: - pass - """ - self.convert_check(before, after, stages=[1]) - - @unittest.expectedFailure - def test_string_exceptions(self): - """ - 2to3 does not convert string exceptions: see - http://python3porting.com/differences.html. - """ - before = """ - try: - raise "old string exception" - except Exception, e: - pass - """ - after = """ - try: - raise Exception("old string exception") - except Exception as e: - pass - """ - self.convert_check(before, after, stages=[1]) - - @unittest.expectedFailure - def test_oldstyle_classes(self): - """ - We don't convert old-style classes to new-style automatically. Should we? - """ - before = """ - class Blah: - pass - """ - after = """ - class Blah(object): - pass - """ - self.convert_check(before, after, stages=[1]) - - @unittest.expectedFailure - def test_all(self): - """ - Standard library module names should not be changed in stage 1 - """ - before = """ - import ConfigParser - import HTMLParser - import collections - - print 'Hello' - try: - raise AttributeError('blah') - except AttributeError, e: - pass - print 'Number is', 1 / 2 - """ - after = """ - from future.utils import old_div - import Configparser - import HTMLParser - import collections - - print('Hello') - try: - raise AttributeError('blah') - except AttributeError as e: - pass - print('Number is', old_div(1, 2)) - """ - self.convert_check(before, after, stages=[1]) - - def test_octal_literals(self): - before = """ - mode = 0644 - """ - after = """ - mode = 0o644 - """ - self.convert_check(before, after) - - def test_long_int_literals(self): - before = """ - bignumber = 12345678901234567890L - """ - after = """ - bignumber = 12345678901234567890 - """ - self.convert_check(before, after) - - def test___future___import_position(self): - """ - Issue #4: __future__ imports inserted too low in file: SyntaxError - """ - code = """ - # Comments here - # and here - __version__=''' $Id$ ''' - __doc__="A Sequencer class counts things. It aids numbering and formatting lists." - __all__='Sequencer getSequencer setSequencer'.split() - # - # another comment - # - - CONSTANTS = [ 0, 01, 011, 0111, 012, 02, 021, 0211, 02111, 013 ] - _RN_LETTERS = "IVXLCDM" - - def my_func(value): - pass - - ''' Docstring-like comment here ''' - """ - self.convert(code) - - -if __name__ == '__main__': - unittest.main() diff --git a/future/tests/test_futurize_from3.py b/future/tests/test_futurize_from3.py deleted file mode 100644 index d7e9c29b..00000000 --- a/future/tests/test_futurize_from3.py +++ /dev/null @@ -1,155 +0,0 @@ -""" -This module contains snippets of Python 3 code (invalid Python 2) and -tests for whether they can be passed to ``futurize --from3`` and immediately -run under both Python 2 and Python 3. -""" - -from __future__ import print_function, absolute_import - -import pprint -from subprocess import Popen, PIPE -import tempfile -import os - -from future.tests.base import CodeHandler, unittest - - -class TestFuturizeFrom3(CodeHandler): - def test_range_slice(self): - """ - After running ``futurize --from3``, this Python 3 code should run on - both Py3 and Py2 without a MemoryError - """ - code = ''' - for i in range(10**15)[:10]: - pass - ''' - self.unchanged(code, from3=True) - - def test_print(self): - """ - This Python 3-only code is a SyntaxError on Py2 without the - print_function import from __future__. - """ - code = ''' - import sys - print('Hello', file=sys.stderr) - ''' - self.unchanged(code, from3=True) - - def test_division(self): - """ - True division should not be screwed up by conversion from 3 to both - """ - code = ''' - x = 3 / 2 - assert x == 1.5 - ''' - self.unchanged(code, from3=True) - - -class TestFuturizeAnnotations(CodeHandler): - @unittest.expectedFailure - def test_return_annotations_alone(self): - before = "def foo() -> 'bar': pass" - after = """ - def foo(): pass - foo.__annotations__ = {'return': 'bar'} - """ - self.check(before, after, from3=True) - - b = """ - def foo() -> "bar": - print "baz" - print "what's next, again?" - """ - a = """ - def foo(): - print "baz" - print "what's next, again?" - """ - self.check(b, a, from3=True) - - @unittest.expectedFailure - def test_single_param_annotations(self): - b = "def foo(bar:'baz'): pass" - a = """ - def foo(bar): pass - foo.__annotations__ = {'bar': 'baz'} - """ - self.check(b, a, from3=True) - - b = """ - def foo(bar:"baz"="spam"): - print("what's next, again?") - print("whatever.") - """ - a = """ - def foo(bar="spam"): - print("what's next, again?") - print("whatever.") - foo.__annotations__ = {'bar': 'baz'} - """ - self.check(b, a, from3=True) - - @unittest.expectedFailure - def test_multiple_param_annotations(self): - b = "def foo(bar:'spam'=False, baz:'eggs'=True, ham:False='spaghetti'): pass" - a = "def foo(bar=False, baz=True, ham='spaghetti'): pass" - self.check(b, a, from3=True) - - b = """ - def foo(bar:"spam"=False, baz:"eggs"=True, ham:False="spam"): - print("this is filler, just doing a suite") - print("suites require multiple lines.") - """ - a = """ - def foo(bar=False, baz=True, ham="spam"): - print("this is filler, just doing a suite") - print("suites require multiple lines.") - """ - self.check(b, a, from3=True) - - @unittest.expectedFailure - def test_mixed_annotations(self): - b = "def foo(bar=False, baz:'eggs'=True, ham:False='spaghetti') -> 'zombies': pass" - a = "def foo(bar=False, baz=True, ham='spaghetti'): pass" - self.check(b, a, from3=True) - - b = """ - def foo(bar:"spam"=False, baz=True, ham:False="spam") -> 'air': - print("this is filler, just doing a suite") - print("suites require multiple lines.") - """ - a = """ - def foo(bar=False, baz=True, ham="spam"): - print("this is filler, just doing a suite") - print("suites require multiple lines.") - """ - self.check(b, a, from3=True) - - b = "def foo(bar) -> 'brains': pass" - a = "def foo(bar): pass" - self.check(b, a, from3=True) - - def test_functions_unchanged(self): - s = "def foo(): pass" - self.unchanged(s, from3=True) - - s = """ - def foo(): - pass - pass - """ - self.unchanged(s, from3=True) - - s = """ - def foo(bar='baz'): - pass - pass - """ - self.unchanged(s, from3=True) - - -if __name__ == '__main__': - unittest.main() diff --git a/future/tests/test_httpservers.py b/future/tests/test_httpservers.py deleted file mode 100644 index 83e2d965..00000000 --- a/future/tests/test_httpservers.py +++ /dev/null @@ -1,715 +0,0 @@ -# coding: utf-8 - -"""Unittests for the various HTTPServer modules. - -From Python 3.3 - -Written by Cody A.W. Somerville , -Josip Dzolonga, and Michael Otteneder for the 2007/08 GHOP contest. -""" - -from __future__ import (absolute_import, division, - print_function, unicode_literals) -from future import standard_library -from future.builtins import * - -from http.server import BaseHTTPRequestHandler, HTTPServer, \ - SimpleHTTPRequestHandler, CGIHTTPRequestHandler -from http import server - -import os -import sys -import re -import base64 -import shutil -# Not ported yet: -# import urllib.parse -# Use this instead: -import urllib -import http.client -import tempfile -from io import BytesIO - -from test import support -from future.tests.base import unittest - -threading = support.import_module('threading') - - -class NoLogRequestHandler(object): - def log_message(self, *args): - # don't write log messages to stderr - pass - - def read(self, n=None): - return '' - - -class TestServerThread(threading.Thread): - def __init__(self, test_object, request_handler): - threading.Thread.__init__(self) - self.request_handler = request_handler - self.test_object = test_object - - def run(self): - self.server = HTTPServer(('localhost', 0), self.request_handler) - self.test_object.HOST, self.test_object.PORT = self.server.socket.getsockname() - self.test_object.server_started.set() - self.test_object = None - try: - self.server.serve_forever(0.05) - finally: - self.server.server_close() - - def stop(self): - self.server.shutdown() - - -class BaseTestCase(unittest.TestCase): - def setUp(self): - self._threads = support.threading_setup() - os.environ = support.EnvironmentVarGuard() - self.server_started = threading.Event() - self.thread = TestServerThread(self, self.request_handler) - self.thread.start() - self.server_started.wait() - - def tearDown(self): - self.thread.stop() - self.thread = None - os.environ.__exit__() - support.threading_cleanup(*self._threads) - - def request(self, uri, method='GET', body=None, headers={}): - self.connection = http.client.HTTPConnection(self.HOST, self.PORT) - self.connection.request(method, uri, body, headers) - return self.connection.getresponse() - - -class BaseHTTPServerTestCase(BaseTestCase): - class request_handler(NoLogRequestHandler, BaseHTTPRequestHandler): - protocol_version = 'HTTP/1.1' - default_request_version = 'HTTP/1.1' - - def do_TEST(self): - self.send_response(204) - self.send_header('Content-Type', 'text/html') - self.send_header('Connection', 'close') - self.end_headers() - - def do_KEEP(self): - self.send_response(204) - self.send_header('Content-Type', 'text/html') - self.send_header('Connection', 'keep-alive') - self.end_headers() - - def do_KEYERROR(self): - self.send_error(999) - - def do_CUSTOM(self): - self.send_response(999) - self.send_header('Content-Type', 'text/html') - self.send_header('Connection', 'close') - self.end_headers() - - def do_LATINONEHEADER(self): - self.send_response(999) - self.send_header('X-Special', 'Dängerous Mind') - self.send_header('Connection', 'close') - self.end_headers() - body = self.headers['x-special-incoming'].encode('utf-8') - self.wfile.write(body) - - def setUp(self): - BaseTestCase.setUp(self) - self.con = http.client.HTTPConnection(self.HOST, self.PORT) - self.con.connect() - - def test_command(self): - self.con.request('GET', '/') - res = self.con.getresponse() - self.assertEqual(res.status, 501) - - def test_request_line_trimming(self): - self.con._http_vsn_str = 'HTTP/1.1\n' - self.con.putrequest('GET', '/') - self.con.endheaders() - res = self.con.getresponse() - self.assertEqual(res.status, 501) - - def test_version_bogus(self): - self.con._http_vsn_str = 'FUBAR' - self.con.putrequest('GET', '/') - self.con.endheaders() - res = self.con.getresponse() - self.assertEqual(res.status, 400) - - def test_version_digits(self): - self.con._http_vsn_str = 'HTTP/9.9.9' - self.con.putrequest('GET', '/') - self.con.endheaders() - res = self.con.getresponse() - self.assertEqual(res.status, 400) - - def test_version_none_get(self): - self.con._http_vsn_str = '' - self.con.putrequest('GET', '/') - self.con.endheaders() - res = self.con.getresponse() - self.assertEqual(res.status, 501) - - def test_version_none(self): - self.con._http_vsn_str = '' - self.con.putrequest('PUT', '/') - self.con.endheaders() - res = self.con.getresponse() - self.assertEqual(res.status, 400) - - def test_version_invalid(self): - self.con._http_vsn = 99 - self.con._http_vsn_str = 'HTTP/9.9' - self.con.putrequest('GET', '/') - self.con.endheaders() - res = self.con.getresponse() - self.assertEqual(res.status, 505) - - def test_send_blank(self): - self.con._http_vsn_str = '' - self.con.putrequest('', '') - self.con.endheaders() - res = self.con.getresponse() - self.assertEqual(res.status, 400) - - def test_header_close(self): - self.con.putrequest('GET', '/') - self.con.putheader('Connection', 'close') - self.con.endheaders() - res = self.con.getresponse() - self.assertEqual(res.status, 501) - - def test_head_keep_alive(self): - self.con._http_vsn_str = 'HTTP/1.1' - self.con.putrequest('GET', '/') - self.con.putheader('Connection', 'keep-alive') - self.con.endheaders() - res = self.con.getresponse() - self.assertEqual(res.status, 501) - - def test_handler(self): - self.con.request('TEST', '/') - res = self.con.getresponse() - self.assertEqual(res.status, 204) - - def test_return_header_keep_alive(self): - self.con.request('KEEP', '/') - res = self.con.getresponse() - self.assertEqual(res.getheader('Connection'), 'keep-alive') - self.con.request('TEST', '/') - self.addCleanup(self.con.close) - - def test_internal_key_error(self): - self.con.request('KEYERROR', '/') - res = self.con.getresponse() - self.assertEqual(res.status, 999) - - def test_return_custom_status(self): - self.con.request('CUSTOM', '/') - res = self.con.getresponse() - self.assertEqual(res.status, 999) - - @unittest.skip('Unicode bug in Py2.7 email.parser.parsestr ?') - def test_latin1_header(self): - self.con.request('LATINONEHEADER', '/', headers={ - 'X-Special-Incoming': 'Ärger mit Unicode' - }) - res = self.con.getresponse() - self.assertEqual(res.getheader('X-Special'), 'Dängerous Mind') - self.assertEqual(res.read(), 'Ärger mit Unicode'.encode('utf-8')) - - -class SimpleHTTPServerTestCase(BaseTestCase): - class request_handler(NoLogRequestHandler, SimpleHTTPRequestHandler): - pass - - def setUp(self): - BaseTestCase.setUp(self) - self.cwd = os.getcwd() - basetempdir = tempfile.gettempdir() - os.chdir(basetempdir) - self.data = bytes(b'We are the knights who say Ni!') - self.tempdir = tempfile.mkdtemp(dir=basetempdir) - self.tempdir_name = os.path.basename(self.tempdir) - with open(os.path.join(self.tempdir, 'test'), 'wb') as temp: - temp.write(self.data) - - def tearDown(self): - try: - os.chdir(self.cwd) - try: - shutil.rmtree(self.tempdir) - except: - pass - finally: - BaseTestCase.tearDown(self) - - def check_status_and_reason(self, response, status, data=None): - body = response.read() - self.assertTrue(response) - self.assertEqual(response.status, status) - self.assertIsNotNone(response.reason) - if data: - self.assertEqual(data, body) - - def test_get(self): - #constructs the path relative to the root directory of the HTTPServer - response = self.request(self.tempdir_name + '/test') - self.check_status_and_reason(response, 200, data=self.data) - response = self.request(self.tempdir_name + '/') - self.check_status_and_reason(response, 200) - response = self.request(self.tempdir_name) - self.check_status_and_reason(response, 301) - response = self.request('/ThisDoesNotExist') - self.check_status_and_reason(response, 404) - response = self.request('/' + 'ThisDoesNotExist' + '/') - self.check_status_and_reason(response, 404) - with open(os.path.join(self.tempdir_name, 'index.html'), 'w') as f: - response = self.request('/' + self.tempdir_name + '/') - self.check_status_and_reason(response, 200) - # chmod() doesn't work as expected on Windows, and filesystem - # permissions are ignored by root on Unix. - if os.name == 'posix' and os.geteuid() != 0: - os.chmod(self.tempdir, 0) - response = self.request(self.tempdir_name + '/') - self.check_status_and_reason(response, 404) - os.chmod(self.tempdir, 0o755) - - def test_head(self): - response = self.request( - self.tempdir_name + '/test', method='HEAD') - self.check_status_and_reason(response, 200) - self.assertEqual(response.getheader('content-length'), - str(len(self.data))) - self.assertEqual(response.getheader('content-type'), - 'application/octet-stream') - - def test_invalid_requests(self): - response = self.request('/', method='FOO') - self.check_status_and_reason(response, 501) - # requests must be case sensitive,so this should fail too - response = self.request('/', method='get') - self.check_status_and_reason(response, 501) - response = self.request('/', method='GETs') - self.check_status_and_reason(response, 501) - - -cgi_file1 = """\ -#!%s - -print("Content-type: text/html") -print() -print("Hello World") -""" - -cgi_file2 = """\ -#!%s -import cgi - -print("Content-type: text/html") -print() - -form = cgi.FieldStorage() -print("%%s, %%s, %%s" %% (form.getfirst("spam"), form.getfirst("eggs"), - form.getfirst("bacon"))) -""" - - -@unittest.skipIf(hasattr(os, 'geteuid') and os.geteuid() == 0, - "This test can't be run reliably as root (issue #13308).") -class CGIHTTPServerTestCase(BaseTestCase): - class request_handler(NoLogRequestHandler, CGIHTTPRequestHandler): - pass - - linesep = os.linesep.encode('ascii') - - def setUp(self): - BaseTestCase.setUp(self) - self.cwd = os.getcwd() - self.parent_dir = tempfile.mkdtemp() - self.cgi_dir = os.path.join(self.parent_dir, 'cgi-bin') - os.mkdir(self.cgi_dir) - self.file1_path = None - self.file2_path = None - - # The shebang line should be pure ASCII: use symlink if possible. - # See issue #7668. - if support.can_symlink(): - self.pythonexe = os.path.join(self.parent_dir, 'python') - os.symlink(sys.executable, self.pythonexe) - else: - self.pythonexe = sys.executable - - try: - # The python executable path is written as the first line of the - # CGI Python script. The encoding cookie cannot be used, and so the - # path should be encodable to the default script encoding (utf-8) - self.pythonexe.encode('utf-8') - except UnicodeEncodeError: - self.tearDown() - self.skipTest("Python executable path is not encodable to utf-8") - - self.file1_path = os.path.join(self.cgi_dir, 'file1.py') - with open(self.file1_path, 'w', encoding='utf-8') as file1: - file1.write(cgi_file1 % self.pythonexe) - os.chmod(self.file1_path, 0o777) - - self.file2_path = os.path.join(self.cgi_dir, 'file2.py') - with open(self.file2_path, 'w', encoding='utf-8') as file2: - file2.write(cgi_file2 % self.pythonexe) - os.chmod(self.file2_path, 0o777) - - os.chdir(self.parent_dir) - - def tearDown(self): - try: - os.chdir(self.cwd) - if self.pythonexe != sys.executable: - os.remove(self.pythonexe) - if self.file1_path: - os.remove(self.file1_path) - if self.file2_path: - os.remove(self.file2_path) - os.rmdir(self.cgi_dir) - os.rmdir(self.parent_dir) - finally: - BaseTestCase.tearDown(self) - - def test_url_collapse_path(self): - # verify tail is the last portion and head is the rest on proper urls - test_vectors = { - '': '//', - '..': IndexError, - '/.//..': IndexError, - '/': '//', - '//': '//', - '/\\': '//\\', - '/.//': '//', - 'cgi-bin/file1.py': '/cgi-bin/file1.py', - '/cgi-bin/file1.py': '/cgi-bin/file1.py', - 'a': '//a', - '/a': '//a', - '//a': '//a', - './a': '//a', - './C:/': '/C:/', - '/a/b': '/a/b', - '/a/b/': '/a/b/', - '/a/b/.': '/a/b/', - '/a/b/c/..': '/a/b/', - '/a/b/c/../d': '/a/b/d', - '/a/b/c/../d/e/../f': '/a/b/d/f', - '/a/b/c/../d/e/../../f': '/a/b/f', - '/a/b/c/../d/e/.././././..//f': '/a/b/f', - '../a/b/c/../d/e/.././././..//f': IndexError, - '/a/b/c/../d/e/../../../f': '/a/f', - '/a/b/c/../d/e/../../../../f': '//f', - '/a/b/c/../d/e/../../../../../f': IndexError, - '/a/b/c/../d/e/../../../../f/..': '//', - '/a/b/c/../d/e/../../../../f/../.': '//', - } - for path, expected in test_vectors.items(): - if isinstance(expected, type) and issubclass(expected, Exception): - self.assertRaises(expected, - server._url_collapse_path, path) - else: - actual = server._url_collapse_path(path) - self.assertEqual(expected, actual, - msg='path = %r\nGot: %r\nWanted: %r' % - (path, actual, expected)) - - @unittest.expectedFailure - def test_headers_and_content(self): - res = self.request('/cgi-bin/file1.py') - self.assertEqual((b'Hello World' + self.linesep, 'text/html', 200), - (res.read(), res.getheader('Content-type'), res.status)) - - @unittest.expectedFailure - def test_post(self): - # Was: params = urllib.parse.urlencode( - params = urllib.urlencode( - {'spam' : 1, 'eggs' : 'python', 'bacon' : 123456}) - headers = {'Content-type' : 'application/x-www-form-urlencoded'} - res = self.request('/cgi-bin/file2.py', 'POST', params, headers) - - self.assertEqual(res.read(), b'1, python, 123456' + self.linesep) - - def test_invaliduri(self): - res = self.request('/cgi-bin/invalid') - res.read() - self.assertEqual(res.status, 404) - - @unittest.expectedFailure - def test_authorization(self): - headers = {bytes(b'Authorization') : bytes(b'Basic ') + - base64.b64encode(bytes(b'username:pass'))} - res = self.request('/cgi-bin/file1.py', 'GET', headers=headers) - self.assertEqual((b'Hello World' + self.linesep, 'text/html', 200), - (res.read(), res.getheader('Content-type'), res.status)) - - @unittest.expectedFailure - def test_no_leading_slash(self): - # http://bugs.python.org/issue2254 - res = self.request('cgi-bin/file1.py') - self.assertEqual((b'Hello World' + self.linesep, 'text/html', 200), - (res.read(), res.getheader('Content-type'), res.status)) - - @unittest.expectedFailure - def test_os_environ_is_not_altered(self): - signature = "Test CGI Server" - os.environ['SERVER_SOFTWARE'] = signature - res = self.request('/cgi-bin/file1.py') - self.assertEqual((b'Hello World' + self.linesep, 'text/html', 200), - (res.read(), res.getheader('Content-type'), res.status)) - self.assertEqual(os.environ['SERVER_SOFTWARE'], signature) - - -class SocketlessRequestHandler(SimpleHTTPRequestHandler, object): - def __init__(self): - self.get_called = False - self.protocol_version = "HTTP/1.1" - - def do_GET(self): - self.get_called = True - self.send_response(200) - self.send_header('Content-Type', 'text/html') - self.end_headers() - self.wfile.write(bytes(b'Data\r\n')) - - def log_message(self, format, *args): - pass - - -class RejectingSocketlessRequestHandler(SocketlessRequestHandler): - def handle_expect_100(self): - self.send_error(417) - return False - - -class AuditableBytesIO(object): - - def __init__(self): - self.datas = [] - - def write(self, data): - self.datas.append(data) - - def getData(self): - return bytes(b'').join(self.datas) - - @property - def numWrites(self): - return len(self.datas) - - -class BaseHTTPRequestHandlerTestCase(unittest.TestCase): - """Test the functionality of the BaseHTTPServer. - - Test the support for the Expect 100-continue header. - """ - - HTTPResponseMatch = re.compile(b'HTTP/1.[0-9]+ 200 OK') - - def setUp (self): - self.handler = SocketlessRequestHandler() - - def send_typical_request(self, message): - input = BytesIO(message) - output = BytesIO() - self.handler.rfile = input - self.handler.wfile = output - self.handler.handle_one_request() - output.seek(0) - return output.readlines() - - def verify_get_called(self): - self.assertTrue(self.handler.get_called) - - def verify_expected_headers(self, headers): - for fieldName in b'Server: ', b'Date: ', b'Content-Type: ': - self.assertEqual(sum(h.startswith(fieldName) for h in headers), 1) - - def verify_http_server_response(self, response): - match = self.HTTPResponseMatch.search(response) - self.assertTrue(match is not None) - - def test_http_1_1(self): - result = self.send_typical_request(bytes(b'GET / HTTP/1.1\r\n\r\n')) - self.verify_http_server_response(result[0]) - self.verify_expected_headers(result[1:-1]) - self.verify_get_called() - self.assertEqual(result[-1], b'Data\r\n') - - def test_http_1_0(self): - result = self.send_typical_request(bytes(b'GET / HTTP/1.0\r\n\r\n')) - self.verify_http_server_response(result[0]) - self.verify_expected_headers(result[1:-1]) - self.verify_get_called() - self.assertEqual(result[-1], b'Data\r\n') - - def test_http_0_9(self): - result = self.send_typical_request(bytes(b'GET / HTTP/0.9\r\n\r\n')) - self.assertEqual(len(result), 1) - self.assertEqual(result[0], b'Data\r\n') - self.verify_get_called() - - def test_with_continue_1_0(self): - result = self.send_typical_request(bytes(b'GET / HTTP/1.0\r\nExpect: 100-continue\r\n\r\n')) - self.verify_http_server_response(result[0]) - self.verify_expected_headers(result[1:-1]) - self.verify_get_called() - self.assertEqual(result[-1], b'Data\r\n') - - def test_with_continue_1_1(self): - result = self.send_typical_request(bytes(b'GET / HTTP/1.1\r\nExpect: 100-continue\r\n\r\n')) - self.assertEqual(result[0], b'HTTP/1.1 100 Continue\r\n') - self.assertEqual(result[1], b'HTTP/1.1 200 OK\r\n') - self.verify_expected_headers(result[2:-1]) - self.verify_get_called() - self.assertEqual(result[-1], b'Data\r\n') - - def test_header_buffering_of_send_error(self): - - input = BytesIO(bytes(b'GET / HTTP/1.1\r\n\r\n')) - output = AuditableBytesIO() - handler = SocketlessRequestHandler() - handler.rfile = input - handler.wfile = output - handler.request_version = 'HTTP/1.1' - handler.requestline = '' - handler.command = None - - handler.send_error(418) - self.assertEqual(output.numWrites, 2) - - def test_header_buffering_of_send_response_only(self): - - input = BytesIO(bytes(b'GET / HTTP/1.1\r\n\r\n')) - output = AuditableBytesIO() - handler = SocketlessRequestHandler() - handler.rfile = input - handler.wfile = output - handler.request_version = 'HTTP/1.1' - - handler.send_response_only(418) - self.assertEqual(output.numWrites, 0) - handler.end_headers() - self.assertEqual(output.numWrites, 1) - - def test_header_buffering_of_send_header(self): - - input = BytesIO(bytes(b'GET / HTTP/1.1\r\n\r\n')) - output = AuditableBytesIO() - handler = SocketlessRequestHandler() - handler.rfile = input - handler.wfile = output - handler.request_version = 'HTTP/1.1' - - handler.send_header('Foo', 'foo') - handler.send_header('bar', 'bar') - self.assertEqual(output.numWrites, 0) - handler.end_headers() - self.assertEqual(output.getData(), b'Foo: foo\r\nbar: bar\r\n\r\n') - self.assertEqual(output.numWrites, 1) - - def test_header_unbuffered_when_continue(self): - - def _readAndReseek(f): - pos = f.tell() - f.seek(0) - data = f.read() - f.seek(pos) - return data - - input = BytesIO(bytes(b'GET / HTTP/1.1\r\nExpect: 100-continue\r\n\r\n')) - output = BytesIO() - self.handler.rfile = input - self.handler.wfile = output - self.handler.request_version = 'HTTP/1.1' - - self.handler.handle_one_request() - self.assertNotEqual(_readAndReseek(output), b'') - result = _readAndReseek(output).split(bytes(b'\r\n')) - self.assertEqual(result[0], b'HTTP/1.1 100 Continue') - self.assertEqual(result[1], b'HTTP/1.1 200 OK') - - def test_with_continue_rejected(self): - usual_handler = self.handler # Save to avoid breaking any subsequent tests. - self.handler = RejectingSocketlessRequestHandler() - result = self.send_typical_request(bytes(b'GET / HTTP/1.1\r\nExpect: 100-continue\r\n\r\n')) - self.assertEqual(result[0], b'HTTP/1.1 417 Expectation Failed\r\n') - self.verify_expected_headers(result[1:-1]) - # The expect handler should short circuit the usual get method by - # returning false here, so get_called should be false - self.assertFalse(self.handler.get_called) - self.assertEqual(sum(r == b'Connection: close\r\n' for r in result[1:-1]), 1) - self.handler = usual_handler # Restore to avoid breaking any subsequent tests. - - def test_request_length(self): - # Issue #10714: huge request lines are discarded, to avoid Denial - # of Service attacks. - result = self.send_typical_request(bytes(b'GET ') + bytes(b'x') * 65537) - self.assertEqual(result[0], b'HTTP/1.1 414 Request-URI Too Long\r\n') - self.assertFalse(self.handler.get_called) - - def test_header_length(self): - # Issue #6791: same for headers - result = self.send_typical_request( - bytes(b'GET / HTTP/1.1\r\nX-Foo: bar') + bytes(b'r') * 65537 + bytes(b'\r\n\r\n')) - self.assertEqual(result[0], b'HTTP/1.1 400 Line too long\r\n') - self.assertFalse(self.handler.get_called) - - -class SimpleHTTPRequestHandlerTestCase(unittest.TestCase): - """ Test url parsing """ - def setUp(self): - self.translated = os.getcwd() - self.translated = os.path.join(self.translated, 'filename') - self.handler = SocketlessRequestHandler() - - def test_query_arguments(self): - path = self.handler.translate_path('/filename') - self.assertEqual(path, self.translated) - path = self.handler.translate_path('/filename?foo=bar') - self.assertEqual(path, self.translated) - path = self.handler.translate_path('/filename?a=b&spam=eggs#zot') - self.assertEqual(path, self.translated) - - def test_start_with_double_slash(self): - path = self.handler.translate_path('//filename') - self.assertEqual(path, self.translated) - path = self.handler.translate_path('//filename?foo=bar') - self.assertEqual(path, self.translated) - - -class DummyTest(unittest.TestCase): - """ - It might help on travis-ci to have at least one test being executed - for this module. - """ - def test_nothing(self): - self.assertTrue(True) - - -def test_main(verbose=None): - cwd = os.getcwd() - try: - support.run_unittest( - BaseHTTPRequestHandlerTestCase, - BaseHTTPServerTestCase, - SimpleHTTPServerTestCase, - CGIHTTPServerTestCase, - SimpleHTTPRequestHandlerTestCase, - ) - finally: - os.chdir(cwd) - -if __name__ == '__main__': - test_main() diff --git a/future/tests/test_int.py b/future/tests/test_int.py deleted file mode 100644 index 4d9fe587..00000000 --- a/future/tests/test_int.py +++ /dev/null @@ -1,420 +0,0 @@ -""" -int tests from Py3.3 -""" - -from __future__ import (absolute_import, division, - print_function, unicode_literals) -from future import standard_library, utils -from future.builtins import * -from future.tests.base import unittest - -import sys -import random -from test import support - - -L = [ - ('0', 0), - ('1', 1), - ('9', 9), - ('10', 10), - ('99', 99), - ('100', 100), - ('314', 314), - (' 314', 314), - ('314 ', 314), - (' \t\t 314 \t\t ', 314), - (repr(sys.maxsize), sys.maxsize), - (' 1x', ValueError), - (' 1 ', 1), - (' 1\02 ', ValueError), - ('', ValueError), - (' ', ValueError), - (' \t\t ', ValueError), - ("\u0200", ValueError) -] - -class IntTestCases(unittest.TestCase): - - def test_basic(self): - self.assertEqual(int(314), 314) - self.assertEqual(int(3.14), 3) - # Check that conversion from float truncates towards zero - self.assertEqual(int(-3.14), -3) - self.assertEqual(int(3.9), 3) - self.assertEqual(int(-3.9), -3) - self.assertEqual(int(3.5), 3) - self.assertEqual(int(-3.5), -3) - self.assertEqual(int("-3"), -3) - self.assertEqual(int(" -3 "), -3) - self.assertEqual(int("\N{EM SPACE}-3\N{EN SPACE}"), -3) - # Different base: - self.assertEqual(int("10",16), 16) - # Test conversion from strings and various anomalies - for s, v in L: - for sign in "", "+", "-": - for prefix in "", " ", "\t", " \t\t ": - ss = prefix + sign + s - vv = v - if sign == "-" and v is not ValueError: - vv = -v - try: - self.assertEqual(int(ss), vv) - except ValueError: - pass - - s = repr(-1-sys.maxsize) - x = int(s) - self.assertEqual(x+1, -sys.maxsize) - self.assertIsInstance(x, int) - # should return int - self.assertEqual(int(s[1:]), sys.maxsize+1) - - # should return int - x = int(1e100) - self.assertIsInstance(x, int) - x = int(-1e100) - self.assertIsInstance(x, int) - - - # SF bug 434186: 0x80000000/2 != 0x80000000>>1. - # Worked by accident in Windows release build, but failed in debug build. - # Failed in all Linux builds. - x = -1-sys.maxsize - self.assertEqual(x >> 1, x//2) - - self.assertRaises(ValueError, int, '123\0') - self.assertRaises(ValueError, int, '53', 40) - - # SF bug 1545497: embedded NULs were not detected with - # explicit base - self.assertRaises(ValueError, int, '123\0', 10) - self.assertRaises(ValueError, int, '123\x00 245', 20) - - x = int('1' * 600) - self.assertIsInstance(x, int) - - - self.assertRaises(TypeError, int, 1, 12) - - self.assertEqual(int('0o123', 0), 83) - self.assertEqual(int('0x123', 16), 291) - - # Bug 1679: "0x" is not a valid hex literal - self.assertRaises(ValueError, int, "0x", 16) - self.assertRaises(ValueError, int, "0x", 0) - - self.assertRaises(ValueError, int, "0o", 8) - self.assertRaises(ValueError, int, "0o", 0) - - self.assertRaises(ValueError, int, "0b", 2) - self.assertRaises(ValueError, int, "0b", 0) - - # SF bug 1334662: int(string, base) wrong answers - # Various representations of 2**32 evaluated to 0 - # rather than 2**32 in previous versions - - self.assertEqual(int('100000000000000000000000000000000', 2), 4294967296) - self.assertEqual(int('102002022201221111211', 3), 4294967296) - self.assertEqual(int('10000000000000000', 4), 4294967296) - self.assertEqual(int('32244002423141', 5), 4294967296) - self.assertEqual(int('1550104015504', 6), 4294967296) - self.assertEqual(int('211301422354', 7), 4294967296) - self.assertEqual(int('40000000000', 8), 4294967296) - self.assertEqual(int('12068657454', 9), 4294967296) - self.assertEqual(int('4294967296', 10), 4294967296) - self.assertEqual(int('1904440554', 11), 4294967296) - self.assertEqual(int('9ba461594', 12), 4294967296) - self.assertEqual(int('535a79889', 13), 4294967296) - self.assertEqual(int('2ca5b7464', 14), 4294967296) - self.assertEqual(int('1a20dcd81', 15), 4294967296) - self.assertEqual(int('100000000', 16), 4294967296) - self.assertEqual(int('a7ffda91', 17), 4294967296) - self.assertEqual(int('704he7g4', 18), 4294967296) - self.assertEqual(int('4f5aff66', 19), 4294967296) - self.assertEqual(int('3723ai4g', 20), 4294967296) - self.assertEqual(int('281d55i4', 21), 4294967296) - self.assertEqual(int('1fj8b184', 22), 4294967296) - self.assertEqual(int('1606k7ic', 23), 4294967296) - self.assertEqual(int('mb994ag', 24), 4294967296) - self.assertEqual(int('hek2mgl', 25), 4294967296) - self.assertEqual(int('dnchbnm', 26), 4294967296) - self.assertEqual(int('b28jpdm', 27), 4294967296) - self.assertEqual(int('8pfgih4', 28), 4294967296) - self.assertEqual(int('76beigg', 29), 4294967296) - self.assertEqual(int('5qmcpqg', 30), 4294967296) - self.assertEqual(int('4q0jto4', 31), 4294967296) - self.assertEqual(int('4000000', 32), 4294967296) - self.assertEqual(int('3aokq94', 33), 4294967296) - self.assertEqual(int('2qhxjli', 34), 4294967296) - self.assertEqual(int('2br45qb', 35), 4294967296) - self.assertEqual(int('1z141z4', 36), 4294967296) - - # tests with base 0 - # this fails on 3.0, but in 2.x the old octal syntax is allowed - self.assertEqual(int(' 0o123 ', 0), 83) - self.assertEqual(int(' 0o123 ', 0), 83) - self.assertEqual(int('000', 0), 0) - self.assertEqual(int('0o123', 0), 83) - self.assertEqual(int('0x123', 0), 291) - self.assertEqual(int('0b100', 0), 4) - self.assertEqual(int(' 0O123 ', 0), 83) - self.assertEqual(int(' 0X123 ', 0), 291) - self.assertEqual(int(' 0B100 ', 0), 4) - - # without base still base 10 - self.assertEqual(int('0123'), 123) - self.assertEqual(int('0123', 10), 123) - - # tests with prefix and base != 0 - self.assertEqual(int('0x123', 16), 291) - self.assertEqual(int('0o123', 8), 83) - self.assertEqual(int('0b100', 2), 4) - self.assertEqual(int('0X123', 16), 291) - self.assertEqual(int('0O123', 8), 83) - self.assertEqual(int('0B100', 2), 4) - - # the code has special checks for the first character after the - # type prefix - self.assertRaises(ValueError, int, '0b2', 2) - self.assertRaises(ValueError, int, '0b02', 2) - self.assertRaises(ValueError, int, '0B2', 2) - self.assertRaises(ValueError, int, '0B02', 2) - self.assertRaises(ValueError, int, '0o8', 8) - self.assertRaises(ValueError, int, '0o08', 8) - self.assertRaises(ValueError, int, '0O8', 8) - self.assertRaises(ValueError, int, '0O08', 8) - self.assertRaises(ValueError, int, '0xg', 16) - self.assertRaises(ValueError, int, '0x0g', 16) - self.assertRaises(ValueError, int, '0Xg', 16) - self.assertRaises(ValueError, int, '0X0g', 16) - - # SF bug 1334662: int(string, base) wrong answers - # Checks for proper evaluation of 2**32 + 1 - self.assertEqual(int('100000000000000000000000000000001', 2), 4294967297) - self.assertEqual(int('102002022201221111212', 3), 4294967297) - self.assertEqual(int('10000000000000001', 4), 4294967297) - self.assertEqual(int('32244002423142', 5), 4294967297) - self.assertEqual(int('1550104015505', 6), 4294967297) - self.assertEqual(int('211301422355', 7), 4294967297) - self.assertEqual(int('40000000001', 8), 4294967297) - self.assertEqual(int('12068657455', 9), 4294967297) - self.assertEqual(int('4294967297', 10), 4294967297) - self.assertEqual(int('1904440555', 11), 4294967297) - self.assertEqual(int('9ba461595', 12), 4294967297) - self.assertEqual(int('535a7988a', 13), 4294967297) - self.assertEqual(int('2ca5b7465', 14), 4294967297) - self.assertEqual(int('1a20dcd82', 15), 4294967297) - self.assertEqual(int('100000001', 16), 4294967297) - self.assertEqual(int('a7ffda92', 17), 4294967297) - self.assertEqual(int('704he7g5', 18), 4294967297) - self.assertEqual(int('4f5aff67', 19), 4294967297) - self.assertEqual(int('3723ai4h', 20), 4294967297) - self.assertEqual(int('281d55i5', 21), 4294967297) - self.assertEqual(int('1fj8b185', 22), 4294967297) - self.assertEqual(int('1606k7id', 23), 4294967297) - self.assertEqual(int('mb994ah', 24), 4294967297) - self.assertEqual(int('hek2mgm', 25), 4294967297) - self.assertEqual(int('dnchbnn', 26), 4294967297) - self.assertEqual(int('b28jpdn', 27), 4294967297) - self.assertEqual(int('8pfgih5', 28), 4294967297) - self.assertEqual(int('76beigh', 29), 4294967297) - self.assertEqual(int('5qmcpqh', 30), 4294967297) - self.assertEqual(int('4q0jto5', 31), 4294967297) - self.assertEqual(int('4000001', 32), 4294967297) - self.assertEqual(int('3aokq95', 33), 4294967297) - self.assertEqual(int('2qhxjlj', 34), 4294967297) - self.assertEqual(int('2br45qc', 35), 4294967297) - self.assertEqual(int('1z141z5', 36), 4294967297) - - @unittest.expectedFailure # fails on Py2 - @support.cpython_only - def test_small_ints(self): - # Bug #3236: Return small longs from PyLong_FromString - self.assertIs(int('10'), 10) - self.assertIs(int('-1'), -1) - self.assertIs(int(b'10'), 10) - self.assertIs(int(b'-1'), -1) - - def test_no_args(self): - self.assertEqual(int(), 0) - - def test_keyword_args(self): - # Test invoking int() using keyword arguments. - self.assertEqual(int(x=1.2), 1) - self.assertEqual(int('100', base=2), 4) - self.assertEqual(int(x='100', base=2), 4) - - @unittest.expectedFailure - def test_keyword_args_2(self): - # newint causes these to fail: - self.assertRaises(TypeError, int, base=10) - self.assertRaises(TypeError, int, base=0) - - def test_non_numeric_input_types(self): - # Test possible non-numeric types for the argument x, including - # subclasses of the explicitly documented accepted types. - class CustomStr(str): pass - class CustomBytes(bytes): pass - class CustomByteArray(bytearray): pass - - values = [b'100', - bytearray(b'100'), - CustomStr('100'), - CustomBytes(b'100'), - CustomByteArray(b'100')] - - for x in values: - msg = 'x has type %s' % type(x).__name__ - self.assertEqual(int(x), 100, msg=msg) - self.assertEqual(int(x, 2), 4, msg=msg) - - def test_string_float(self): - self.assertRaises(ValueError, int, '1.2') - - def test_intconversion(self): - # Test __int__() - class ClassicMissingMethods: - pass - # The following raises an AttributeError (for '__trunc__') on Py2 - # but a TypeError on Py3 (which uses new-style classes). - # Perhaps nothing is to be done but avoiding old-style classes! - # ... - # self.assertRaises(TypeError, int, ClassicMissingMethods()) - - class MissingMethods(object): - pass - self.assertRaises(TypeError, int, MissingMethods()) - - class Foo0: - def __int__(self): - return 42 - - class Foo1(object): - def __int__(self): - return 42 - - class Foo2(int): - def __int__(self): - return 42 - - class Foo3(int): - def __int__(self): - return self - - class Foo4(int): - def __int__(self): - return 42 - - class Foo5(int): - def __int__(self): - return 42. - - self.assertEqual(int(Foo0()), 42) - self.assertEqual(int(Foo1()), 42) - self.assertEqual(int(Foo2()), 42) - self.assertEqual(int(Foo3()), 0) - self.assertEqual(int(Foo4()), 42) - self.assertRaises(TypeError, int, Foo5()) - - class Classic: - pass - for base in (object, Classic): - class IntOverridesTrunc(base): - def __int__(self): - return 42 - def __trunc__(self): - return -12 - self.assertEqual(int(IntOverridesTrunc()), 42) - - class JustTrunc(base): - def __trunc__(self): - return 42 - self.assertEqual(int(JustTrunc()), 42) - - for trunc_result_base in (object, Classic): - class Integral(trunc_result_base): - def __int__(self): - return 42 - - class TruncReturnsNonInt(base): - def __trunc__(self): - return Integral() - self.assertEqual(int(TruncReturnsNonInt()), 42) - - class NonIntegral(trunc_result_base): - def __trunc__(self): - # Check that we avoid infinite recursion. - return NonIntegral() - - class TruncReturnsNonIntegral(base): - def __trunc__(self): - return NonIntegral() - try: - int(TruncReturnsNonIntegral()) - except TypeError as e: - # self.assertEqual(str(e), - # "__trunc__ returned non-Integral" - # " (type NonIntegral)") - pass - else: - self.fail("Failed to raise TypeError with %s" % - ((base, trunc_result_base),)) - - # Regression test for bugs.python.org/issue16060. - class BadInt(trunc_result_base): - def __int__(self): - return 42.0 - - class TruncReturnsBadInt(base): - def __trunc__(self): - return BadInt() - - with self.assertRaises(TypeError): - int(TruncReturnsBadInt()) - - #################################################################### - # future-specific tests are below: - #################################################################### - - # Exception messages in Py2 are 8-bit strings. The following fails, - # even if the testlist strings are wrapped in str() calls... - @unittest.expectedFailure - def test_error_message(self): - testlist = ('\xbd', '123\xbd', ' 123 456 ') - for s in testlist: - try: - int(s) - except ValueError as e: - self.assertIn(s.strip(), e.args[0]) - else: - self.fail("Expected int(%r) to raise a ValueError", s) - - def test_bytes_mul(self): - self.assertEqual(b'\x00' * int(5), b'\x00' * 5) - self.assertEqual(bytes(b'\x00') * int(5), bytes(b'\x00') * 5) - - def test_str_mul(self): - self.assertEqual(u'\x00' * int(5), u'\x00' * 5) - self.assertEqual(str(u'\x00') * int(5), str(u'\x00') * 5) - - def test_int_bytes(self): - self.assertEqual(int(b'a\r\n', 16), 10) - self.assertEqual(int(bytes(b'a\r\n'), 16), 10) - - def test_divmod(self): - """ - Test int.__divmod__ - """ - vals = [10**i for i in range(0, 20)] - for i in range(200): - x = random.choice(vals) - y = random.choice(vals) - assert divmod(int(x), int(y)) == divmod(x, y) - assert divmod(int(-x), int(y)) == divmod(-x, y) - assert divmod(int(x), int(-y)) == divmod(x, -y) - assert divmod(int(-x), int(-y)) == divmod(-x, -y) - - -if __name__ == "__main__": - unittest.main() diff --git a/future/tests/test_standard_library.py b/future/tests/test_standard_library.py deleted file mode 100644 index 4b30ee71..00000000 --- a/future/tests/test_standard_library.py +++ /dev/null @@ -1,303 +0,0 @@ -""" -Tests for the future.standard_library module -""" - -from __future__ import absolute_import, unicode_literals, print_function -from future import standard_library -from future import utils -from future.tests.base import unittest - -import sys -import tempfile -import os -import copy - -from future.standard_library import RENAMES, REPLACED_MODULES -from future.tests.base import CodeHandler - - -class TestStandardLibraryRenames(CodeHandler): - - def setUp(self): - self.interpreter = 'python' - self.tempdir = tempfile.mkdtemp() + os.path.sep - - @unittest.skipIf(utils.PY3, 'generic import tests are for Py2 only') - def test_all(self): - """ - Tests whether all of the old imports in RENAMES are accessible - under their new names. - """ - for (oldname, newname) in RENAMES.items(): - if newname == 'winreg' and sys.platform not in ['win32', 'win64']: - continue - if newname in REPLACED_MODULES: - # Skip this check for e.g. the stdlib's ``test`` module, - # which we have replaced completely. - continue - oldmod = __import__(oldname) - newmod = __import__(newname) - if '.' not in oldname: - self.assertEqual(oldmod, newmod) - - def test_suspend_hooks(self): - """ - Code like the try/except block here appears in Pyflakes v0.6.1. This - method tests whether suspend_hooks() works as advertised. - """ - example_PY2_check = False - with standard_library.suspend_hooks(): - # An example of code that we don't want to break: - try: - import builtins # fragile check for Python 3.x - except ImportError: - example_PY2_check = True - if utils.PY2: - self.assertTrue(example_PY2_check) - else: - self.assertFalse(example_PY2_check) - # The import should succeed again now: - import builtins - - def test_remove_hooks(self): - example_PY2_check = False - - standard_library.install_hooks() - old_meta_path = copy.copy(sys.meta_path) - import builtins - - standard_library.remove_hooks() - self.assertTrue(len(old_meta_path) == len(sys.meta_path) + 1) - - # An example of fragile import code that we don't want to break: - try: - import builtins - except ImportError: - example_PY2_check = True - if utils.PY2: - self.assertTrue(example_PY2_check) - else: - self.assertFalse(example_PY2_check) - standard_library.install_hooks() - # The import should succeed again now: - import builtins - self.assertTrue(len(old_meta_path) == len(sys.meta_path)) - - def test_remove_hooks2(self): - """ - This verifies that modules like http.client are no longer accessible after - disabling import hooks, even if they have been previously imported. - - The reason for this test is that Python caches imported modules in sys.modules. - """ - standard_library.remove_hooks() - try: - from . import verify_remove_hooks_affects_imported_modules - except RuntimeError as e: - self.fail(e.message) - finally: - standard_library.install_hooks() - - def test_requests(self): - """ - GitHub issue #19: conflict with ``requests`` - """ - # The below should succeed while ``requests`` is installed: - from . import verify_requests_is_not_broken - - @unittest.skipIf(utils.PY3, 'not testing for old urllib on Py3') - def test_old_urllib_import(self): - """ - Tests whether an imported module can import the old urllib package. - Importing future.standard_library in a script should be possible and - not disrupt any uses of the old Py2 standard library names in modules - imported by that script. - """ - code1 = ''' - from future import standard_library - import module_importing_old_urllib - ''' - self._write_test_script(code1, 'runme.py') - code2 = ''' - import urllib - assert 'urlopen' in dir(urllib) - print('Import succeeded!') - ''' - self._write_test_script(code2, 'module_importing_old_urllib.py') - output = self._run_test_script('runme.py') - print(output) - self.assertTrue(True) - - def test_sys_intern(self): - """ - Py2's builtin intern() has been moved to the sys module. Tests - whether sys.intern is available. - """ - from sys import intern - if utils.PY3: - self.assertEqual(intern('hello'), 'hello') - else: - # intern() requires byte-strings on Py2: - self.assertEqual(intern(b'hello'), b'hello') - - def test_sys_maxsize(self): - """ - Tests whether sys.maxsize is available. - """ - from sys import maxsize - print(maxsize) - self.assertTrue(maxsize > 0) - - def test_itertools_filterfalse(self): - """ - Tests whether itertools.filterfalse is available. - """ - from itertools import filterfalse - not_div_by_3 = filterfalse(lambda x: x % 3 == 0, range(8)) - self.assertEqual(list(not_div_by_3), [1, 2, 4, 5, 7]) - - def test_itertools_zip_longest(self): - """ - Tests whether itertools.zip_longest is available. - """ - from itertools import zip_longest - a = (1, 2) - b = [2, 4, 6] - self.assertEqual(list(zip_longest(a, b)), - [(1, 2), (2, 4), (None, 6)]) - - # def test_import_from_module(self): - # """ - # Tests whether e.g. "import socketserver" succeeds in a module - # imported by another module. We do not want it to! - # """ - # code1 = ''' - # from future import standard_library - # import importme2 - # ''' - # code2 = ''' - # import socketserver - # print('Import succeeded!') - # ''' - # self._write_test_script(code1, 'importme1.py') - # self._write_test_script(code2, 'importme2.py') - # output = self._run_test_script('importme1.py') - # print(output) - - def test_configparser(self): - import configparser - - def test_copyreg(self): - import copyreg - - def test_pickle(self): - import pickle - - def test_profile(self): - import profile - - def test_stringio(self): - from io import StringIO - s = StringIO('test') - for method in ['tell', 'read', 'seek', 'close', 'flush']: - self.assertTrue(hasattr(s, method)) - - def test_bytesio(self): - from io import BytesIO - s = BytesIO(b'test') - for method in ['tell', 'read', 'seek', 'close', 'flush', 'getvalue']: - self.assertTrue(hasattr(s, method)) - - def test_queue(self): - import queue - q = queue.Queue() - q.put('thing') - self.assertFalse(q.empty()) - - def test_reprlib(self): - import reprlib - - def test_socketserver(self): - import socketserver - - @unittest.skip("Not testing tkinter import (it may be installed separately from Python)") - def test_tkinter(self): - import tkinter - - def test_builtins(self): - import builtins - self.assertTrue(hasattr(builtins, 'tuple')) - - @unittest.skip("skipping in case there's no net connection") - def test_urllib_request(self): - import urllib.request - from pprint import pprint - URL = 'http://pypi.python.org/pypi/{0}/json' - package = 'future' - r = urllib.request.urlopen(URL.format(package)) - # pprint(r.read().decode('utf-8')) - - def test_html_import(self): - import html - import html.entities - import html.parser - - def test_http_client_import(self): - import http.client - self.assertTrue(True) - - @unittest.expectedFailure - def test_http_imports(self): - import http - import http.server - import http.cookies - import http.cookiejar - - @unittest.expectedFailure - def test_urllib_imports(self): - import urllib - import urllib.parse - import urllib.request - import urllib.robotparser - import urllib.error - import urllib.response - self.assertTrue(True) - - @unittest.expectedFailure - def test_urllib_parse(self): - import urllib.parse - URL = 'http://pypi.python.org/test_url/spaces oh no/' - self.assertEqual(urllib.parse.quote(URL.format(package)), 'http%3A//pypi.python.org/test_url/spaces%20oh%20no/') - - def test_underscore_prefixed_modules(self): - import _thread - import _dummy_thread - import _markupbase - self.assertTrue(True) - - def test_reduce(self): - """ - reduce has been moved to the functools module - """ - import functools - self.assertEqual(functools.reduce(lambda x, y: x+y, range(1, 6)), 15) - - def test_collections_userstuff(self): - """ - UserDict, UserList, and UserString have been moved to the - collections module. - """ - from collections import UserDict - from collections import UserList - from collections import UserString - - def test_reload(self): - """ - reload has been moved to the imp module - """ - import imp - imp.reload(imp) - self.assertTrue(True) - -if __name__ == '__main__': - unittest.main() diff --git a/future/tests/test_super.py b/future/tests/test_super.py deleted file mode 100644 index d8959f3a..00000000 --- a/future/tests/test_super.py +++ /dev/null @@ -1,10 +0,0 @@ -''' -Tests for the new super() function syntax -''' -from __future__ import absolute_import, print_function -from future.builtins.backports import super -from future.tests.base import unittest - - -if __name__ == '__main__': - unittest.main() diff --git a/future/tests/test_utils.py b/future/tests/test_utils.py deleted file mode 100644 index 37fceb3c..00000000 --- a/future/tests/test_utils.py +++ /dev/null @@ -1,138 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Tests for the various utility functions and classes in ``future.utils`` -""" - -from __future__ import absolute_import, unicode_literals, print_function -import sys -from future.builtins import * -from future.utils import (old_div, istext, isbytes, native, PY2, PY3, - native_str, raise_) - - -from numbers import Integral -from future.tests.base import unittest - -TEST_UNICODE_STR = u'ℝεα∂@ßʟ℮ ☂ℯṧт υηḯ¢☺ḓ℮' - - -class TestUtils(unittest.TestCase): - def setUp(self): - self.s = TEST_UNICODE_STR - self.s2 = str(self.s) - self.b = b'ABCDEFG' - self.b2 = bytes(self.b) - - def test_old_div(self): - """ - Tests whether old_div(a, b) is always equal to Python 2's a / b. - """ - self.assertEqual(old_div(1, 2), 0) - self.assertEqual(old_div(2, 2), 1) - self.assertTrue(isinstance(old_div(2, 2), int)) - - self.assertEqual(old_div(3, 2), 1) - self.assertTrue(isinstance(old_div(3, 2), int)) - - self.assertEqual(old_div(3., 2), 1.5) - self.assertTrue(not isinstance(old_div(3., 2), int)) - - self.assertEqual(old_div(-1, 2.), -0.5) - self.assertTrue(not isinstance(old_div(-1, 2.), int)) - - with self.assertRaises(ZeroDivisionError): - old_div(0, 0) - with self.assertRaises(ZeroDivisionError): - old_div(1, 0) - - def test_native_str(self): - """ - Tests whether native_str is really equal to the platform str. - """ - if PY2: - import __builtin__ - builtin_str = __builtin__.str - else: - import builtins - builtin_str = builtins.str - - inputs = [b'blah', u'blah', 'blah'] - for s in inputs: - self.assertEqual(native_str(s), builtin_str(s)) - self.assertTrue(isinstance(native_str(s), builtin_str)) - - def test_native(self): - a = int(10**20) # long int - b = native(a) - self.assertEqual(a, b) - if PY2: - self.assertEqual(type(b), long) - else: - self.assertEqual(type(b), int) - - c = bytes(b'ABC') - d = native(c) - self.assertEqual(c, d) - if PY2: - self.assertEqual(type(d), type(b'Py2 byte-string')) - else: - self.assertEqual(type(d), bytes) - - s = str(u'ABC') - t = native(s) - self.assertEqual(s, t) - if PY2: - self.assertEqual(type(t), unicode) - else: - self.assertEqual(type(t), str) - type(s) - - def test_istext(self): - self.assertTrue(istext(self.s)) - self.assertTrue(istext(self.s2)) - self.assertFalse(istext(self.b)) - self.assertFalse(istext(self.b2)) - - def test_isbytes(self): - self.assertTrue(isbytes(self.b)) - self.assertTrue(isbytes(self.b2)) - self.assertFalse(isbytes(self.s)) - self.assertFalse(isbytes(self.s2)) - - def test_raise_(self): - def valerror(): - try: - raise ValueError("Apples!") - except Exception as e: - raise_(e) - - self.assertRaises(ValueError, valerror) - - def with_value(): - raise_(IOError, "This is an error") - - self.assertRaises(IOError, with_value) - - try: - with_value() - except IOError as e: - self.assertEqual(str(e), "This is an error") - - def with_traceback(): - try: - raise ValueError("An error") - except Exception as e: - _, _, traceback = sys.exc_info() - raise_(IOError, str(e), traceback) - - self.assertRaises(IOError, with_traceback) - - try: - with_traceback() - except IOError as e: - self.assertEqual(str(e), "An error") - - - -if __name__ == '__main__': - unittest.main() diff --git a/future/utils/__init__.py b/future/utils/__init__.py deleted file mode 100644 index 1e7e0b68..00000000 --- a/future/utils/__init__.py +++ /dev/null @@ -1,534 +0,0 @@ -""" -A selection of cross-compatible functions for Python 2 and 3. - -These come from several sources: -* Jinja2 (BSD licensed: see https://github.com/mitsuhiko/jinja2/blob/master/LICENSE -* Pandas compatibility module pandas.compat -* six.py by Benjamin Peterson -* Django - -This exports useful functions for 2/3 compatible code that are not -builtins on Python 3: -* bind_method: binds functions to classes -* ``native_str_to_bytes`` and ``bytes_to_native_str`` -* ``native_str``: always equal to the native platform string object (because - this may be shadowed by imports from future.builtins) -* lists: lrange(), lmap(), lzip(), lfilter() -* iterable method compatibility: iteritems, iterkeys, itervalues - * Uses the original method if available, otherwise uses items, keys, values. -* types: - * text_type: unicode in Python 2, str in Python 3 - * binary_type: str in Python 2, bythes in Python 3 - * string_types: basestring in Python 2, str in Python 3 - -* bchr(c): - Take an integer and make a 1-character byte string -* bord(c) - Take the result of indexing on a byte string and make an integer -* tobytes(s) - Take a text string, a byte string, or a sequence of characters taken - from a byte string, and make a byte string. - -This module also defines a simple decorator called -``python_2_unicode_compatible`` (from django.utils.encoding) which -defines ``__unicode__`` and ``__str__`` methods consistently under Python -3 and 2. To support Python 3 and 2 with a single code base, simply define -a ``__str__`` method returning unicode text and apply the -python_2_unicode_compatible decorator to the class like this:: - - >>> from future.utils import python_2_unicode_compatible - - >>> @python_2_unicode_compatible - ... class MyClass(object): - ... def __str__(self): - ... return u'Unicode string: \u5b54\u5b50' - - >>> a = MyClass() - -Then, after this import: - - >>> from future.builtins import str - -the following is ``True`` on both Python 3 and 2:: - - >>> str(a) == a.encode('utf-8').decode('utf-8') - True - -and, on a Unicode-enabled terminal with the right fonts, these both print the -Chinese characters for Confucius:: - - print(a) - print(str(a)) - -On Python 3, this decorator is a no-op. - -""" - -import types -import sys -import numbers - -PY3 = sys.version_info[0] == 3 -PY2 = sys.version_info[0] == 2 -PYPY = hasattr(sys, 'pypy_translation_info') - - -def python_2_unicode_compatible(cls): - """ - A decorator that defines __unicode__ and __str__ methods under Python - 2. Under Python 3 it does nothing. - - To support Python 2 and 3 with a single code base, define a __str__ - method returning unicode text and apply this decorator to the class. - - The implementation comes from django.utils.encoding. - """ - if not PY3: - cls.__unicode__ = cls.__str__ - cls.__str__ = lambda self: self.__unicode__().encode('utf-8') - return cls - - -def with_metaclass(meta, *bases): - """ - Function from jinja2/_compat.py. License: BSD. - - Use it like this:: - - class BaseForm(object): - pass - - class FormType(type): - pass - - class Form(with_metaclass(FormType, BaseForm)): - pass - - This requires a bit of explanation: the basic idea is to make a - dummy metaclass for one level of class instantiation that replaces - itself with the actual metaclass. Because of internal type checks - we also need to make sure that we downgrade the custom metaclass - for one level to something closer to type (that's why __call__ and - __init__ comes back from type etc.). - - This has the advantage over six.with_metaclass of not introducing - dummy classes into the final MRO. - """ - class metaclass(meta): - __call__ = type.__call__ - __init__ = type.__init__ - def __new__(cls, name, this_bases, d): - if this_bases is None: - return type.__new__(cls, name, (), d) - return meta(name, bases, d) - return metaclass('temporary_class', None, {}) - - -# Definitions from pandas.compat follow: -if PY3: - def bchr(s): - return bytes([s]) - def bstr(s): - if isinstance(s, str): - return bytes(s, 'latin-1') - else: - return bytes(s) - def bord(s): - return s -else: - # Python 2 - def bchr(s): - return chr(s) - def bstr(s): - return str(s) - def bord(s): - return ord(s) - -### - -if PY3: - def tobytes(s): - if isinstance(s, bytes): - return s - else: - if isinstance(s, str): - return s.encode('latin-1') - else: - return bytes(s) -else: - # Python 2 - def tobytes(s): - ''' - Encodes to latin-1 (where the first 256 chars are the same as - ASCII.) - ''' - if isinstance(s, unicode): - return s.encode('latin-1') - else: - return ''.join(s) - -if PY3: - def native_str_to_bytes(s, encoding='ascii'): - return s.encode(encoding) - - def bytes_to_native_str(b, encoding='ascii'): - return b.decode(encoding) -else: - # Python 2 - def native_str_to_bytes(s, encoding='ascii'): - return s - - def bytes_to_native_str(b, encoding='ascii'): - return b - - -if PY3: - # list-producing versions of the major Python iterating functions - def lrange(*args, **kwargs): - return list(range(*args, **kwargs)) - - def lzip(*args, **kwargs): - return list(zip(*args, **kwargs)) - - def lmap(*args, **kwargs): - return list(map(*args, **kwargs)) - - def lfilter(*args, **kwargs): - return list(filter(*args, **kwargs)) -else: - import __builtin__ - # Python 2-builtin ranges produce lists - lrange = __builtin__.range - lzip = __builtin__.zip - lmap = __builtin__.map - lfilter = __builtin__.filter - - -def isidentifier(s, dotted=False): - ''' - A function equivalent to the str.isidentifier method on Py3 - ''' - if dotted: - return all(isidentifier(a) for a in s.split('.')) - if PY3: - return s.isidentifier() - else: - import re - _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$") - return bool(_name_re.match(s)) - - -def viewitems(obj, **kwargs): - """ - Function for iterating over dictionary items with the same set-like - behaviour on Py2.7 as on Py3. - - Passes kwargs to method.""" - func = getattr(obj, "viewitems", None) - if not func: - func = obj.items - return func(**kwargs) - - -def viewkeys(obj, **kwargs): - """ - Function for iterating over dictionary keys with the same set-like - behaviour on Py2.7 as on Py3. - - Passes kwargs to method.""" - func = getattr(obj, "viewkeys", None) - if not func: - func = obj.keys - return func(**kwargs) - - -def viewvalues(obj, **kwargs): - """ - Function for iterating over dictionary values with the same set-like - behaviour on Py2.7 as on Py3. - - Passes kwargs to method.""" - func = getattr(obj, "viewvalues", None) - if not func: - func = obj.values - return func(**kwargs) - - -def iteritems(obj, **kwargs): - """Use this only if compatibility with Python versions before 2.7 is - required. Otherwise, prefer viewitems(). - """ - func = getattr(obj, "iteritems", None) - if not func: - func = obj.items - return func(**kwargs) - - -def iterkeys(obj, **kwargs): - """Use this only if compatibility with Python versions before 2.7 is - required. Otherwise, prefer viewkeys(). - """ - func = getattr(obj, "iterkeys", None) - if not func: - func = obj.keys - return func(**kwargs) - - -def itervalues(obj, **kwargs): - """Use this only if compatibility with Python versions before 2.7 is - required. Otherwise, prefer viewvalues(). - """ - func = getattr(obj, "itervalues", None) - if not func: - func = obj.values - return func(**kwargs) - - -def bind_method(cls, name, func): - """Bind a method to class, python 2 and python 3 compatible. - - Parameters - ---------- - - cls : type - class to receive bound method - name : basestring - name of method on class instance - func : function - function to be bound as method - - Returns - ------- - None - """ - # only python 2 has bound/unbound method issue - if not PY3: - setattr(cls, name, types.MethodType(func, None, cls)) - else: - setattr(cls, name, func) - - -def getexception(): - return sys.exc_info()[1] - - -if PY3: - def raise_(tp, value=None, tb=None): - """ - A function that matches the Python 2.x ``raise`` statement. This - allows re-raising exceptions with the cls value and traceback on - Python 2 and 3. - """ - if value is not None and isinstance(tp, Exception): - raise TypeError("instance exception may not have a separate value") - if value is not None: - exc = tp(value) - else: - exc = tp - if exc.__traceback__ is not tb: - raise exc.with_traceback(tb) - - def raise_with_traceback(exc, traceback=Ellipsis): - if traceback == Ellipsis: - _, _, traceback = sys.exc_info() - raise exc.with_traceback(traceback) - -else: - exec(''' -def raise_(tp, value=None, tb=None): - raise tp, value, tb - -def raise_with_traceback(exc, traceback=Ellipsis): - if traceback == Ellipsis: - _, _, traceback = sys.exc_info() - raise exc, None, traceback -'''.strip()) - - -raise_with_traceback.__doc__ = ( -"""Raise exception with existing traceback. -If traceback is not passed, uses sys.exc_info() to get traceback.""" -) - - -# Deprecated alias for backward compatibility with ``future`` versions < 0.11: -reraise = raise_ - - -def implements_iterator(cls): - ''' - From jinja2/_compat.py. License: BSD. - - Use as a decorator like this:: - - @implements_iterator - class UppercasingIterator(object): - def __init__(self, iterable): - self._iter = iter(iterable) - def __iter__(self): - return self - def __next__(self): - return next(self._iter).upper() - - ''' - if PY3: - return cls - else: - cls.next = cls.__next__ - del cls.__next__ - return cls - -if PY3: - get_next = lambda x: x.next -else: - get_next = lambda x: x.__next__ - - -def encode_filename(filename): - if PY3: - return filename - else: - if isinstance(filename, unicode): - return filename.encode('utf-8') - return filename - - -def is_new_style(cls): - """ - Python 2.7 has both new-style and old-style classes. Old-style classes can - be pesky in some circumstances, such as when using inheritance. Use this - function to test for whether a class is new-style. (Python 3 only has - new-style classes.) - """ - return hasattr(cls, '__class__') and ('__dict__' in dir(cls) - or hasattr(cls, '__slots__')) - -# The native platform string and bytes types. Useful because ``str`` and -# ``bytes`` are redefined on Py2 by ``from future.builtins import *``. -native_str = str -native_bytes = bytes - - -def istext(obj): - """ - Deprecated. Use:: - >>> isinstance(obj, str) - after this import: - >>> from future.builtins import str - """ - return isinstance(obj, type(u'')) - - -def isbytes(obj): - """ - Deprecated. Use:: - >>> isinstance(obj, bytes) - after this import: - >>> from future.builtins import bytes - """ - return isinstance(obj, type(b'')) - - -def isnewbytes(obj): - """ - Equivalent to the result of ``isinstance(obj, newbytes)`` were - ``__instancecheck__`` not overridden on the newbytes subclass. In - other words, it is REALLY a newbytes instance, not a Py2 native str - object? - """ - # TODO: generalize this so that it works with subclasses of newbytes - # Import is here to avoid circular imports: - from future.builtins.backports.newbytes import newbytes - return type(obj) == newbytes - - -def isint(obj): - """ - Deprecated. Tests whether an object is a Py3 ``int`` or either a Py2 ``int`` or - ``long``. - - Instead of using this function, you can use: - - >>> from future.builtins import int - >>> isinstance(obj, int) - - The following idiom is equivalent: - - >>> from numbers import Integral - >>> isinstance(obj, Integral) - """ - - return isinstance(obj, numbers.Integral) - - -def native(obj): - """ - On Py3, this is a no-op: native(obj) -> obj - - On Py2, returns the corresponding native Py2 types that are - superclasses for backported objects from Py3: - - >>> from future.builtins import str, bytes, int - - >>> native(str(u'ABC')) - u'ABC' - >>> type(native(str(u'ABC'))) - unicode - - >>> native(bytes(b'ABC')) - b'ABC' - >>> type(native(bytes(b'ABC'))) - bytes - - >>> native(int(10**20)) - 100000000000000000000L - >>> type(native(int(10**20))) - long - - Existing native types on Py2 will be returned unchanged: - - >>> type(native(u'ABC')) - unicode - """ - if hasattr(obj, '__native__'): - return obj.__native__() - else: - return obj - - -# Implementation of exec_ is from ``six``: -if PY3: - import builtins - exec_ = getattr(builtins, "exec") -else: - def exec_(code, globs=None, locs=None): - """Execute code in a namespace.""" - if globs is None: - frame = sys._getframe(1) - globs = frame.f_globals - if locs is None: - locs = frame.f_locals - del frame - elif locs is None: - locs = globs - exec("""exec code in globs, locs""") - - -def old_div(a, b): - """ - Equivalent to ``a / b`` on Python 2 without ``from __future__ import - division``. - """ - return a // b if (isint(a) and isint(b)) else a / b - - -__all__ = ['PY3', 'PY2', 'PYPY', 'python_2_unicode_compatible', - 'with_metaclass', 'bchr', 'bstr', 'bord', - 'tobytes', 'str_to_native_bytes', 'bytes_to_native_str', - 'lrange', 'lmap', 'lzip', 'lfilter', - 'isidentifier', 'iteritems', 'iterkeys', 'itervalues', - 'viewitems', 'viewkeys', 'viewvalues', - 'bind_method', 'getexception', - 'reraise', 'implements_iterator', 'get_next', 'encode_filename', - 'is_new_style', 'native_str'] - diff --git a/future/utils/encoding.py b/future/utils/encoding.py deleted file mode 100644 index 3dee86e0..00000000 --- a/future/utils/encoding.py +++ /dev/null @@ -1,211 +0,0 @@ -""" -Various small encoding utils from django.utils.encoding. This has a -BSD-based license; see - https://github.com/django/django/blob/master/LICENSE -""" -from __future__ import unicode_literals - -import codecs -import datetime -from decimal import Decimal -import locale -try: - from urllib.parse import quote -except ImportError: # Python 2 - from urllib import quote - -from future.utils import six - -def python_2_unicode_compatible(klass): - """ - A decorator that defines __unicode__ and __str__ methods under Python 2. - Under Python 3 it does nothing. - - To support Python 2 and 3 with a single code base, define a __str__ method - returning text and apply this decorator to the class. - """ - if not six.PY3: - klass.__unicode__ = klass.__str__ - klass.__str__ = lambda self: self.__unicode__().encode('utf-8') - return klass - -def smart_text(s, encoding='utf-8', strings_only=False, errors='strict'): - """ - Returns a text object representing 's' -- unicode on Python 2 and str on - Python 3. Treats bytestrings using the 'encoding' codec. - - If strings_only is True, don't convert (some) non-string-like objects. - """ - return force_text(s, encoding, strings_only, errors) - -def is_protected_type(obj): - """Determine if the object instance is of a protected type. - - Objects of protected types are preserved as-is when passed to - force_text(strings_only=True). - """ - return isinstance(obj, six.integer_types + (type(None), float, Decimal, - datetime.datetime, datetime.date, datetime.time)) - -def force_text(s, encoding='utf-8', strings_only=False, errors='strict'): - """ - Similar to smart_text, except that lazy instances are resolved to - strings, rather than kept as lazy objects. - - If strings_only is True, don't convert (some) non-string-like objects. - """ - # Handle the common case first, saves 30-40% when s is an instance of - # six.text_type. This function gets called often in that setting. - if isinstance(s, six.text_type): - return s - if strings_only and is_protected_type(s): - return s - try: - if not isinstance(s, six.string_types): - if hasattr(s, '__unicode__'): - s = s.__unicode__() - else: - if six.PY3: - if isinstance(s, bytes): - s = six.text_type(s, encoding, errors) - else: - s = six.text_type(s) - else: - s = six.text_type(bytes(s), encoding, errors) - else: - # Note: We use .decode() here, instead of six.text_type(s, encoding, - # errors), so that if s is a SafeBytes, it ends up being a - # SafeText at the end. - s = s.decode(encoding, errors) - except UnicodeDecodeError as e: - # If we get to here, the caller has passed in an Exception - # subclass populated with non-ASCII bytestring data without a - # working unicode method. Try to handle this without raising a - # further exception by individually forcing the exception args - # to unicode. - s = ' '.join([force_text(arg, encoding, strings_only, - errors) for arg in s]) - return s - -def smart_bytes(s, encoding='utf-8', strings_only=False, errors='strict'): - """ - Returns a bytestring version of 's', encoded as specified in 'encoding'. - - If strings_only is True, don't convert (some) non-string-like objects. - """ - return force_bytes(s, encoding, strings_only, errors) - - -def force_bytes(s, encoding='utf-8', strings_only=False, errors='strict'): - """ - Similar to smart_bytes, except that lazy instances are resolved to - strings, rather than kept as lazy objects. - - If strings_only is True, don't convert (some) non-string-like objects. - """ - if isinstance(s, six.memoryview): - s = bytes(s) - if isinstance(s, bytes): - if encoding == 'utf-8': - return s - else: - return s.decode('utf-8', errors).encode(encoding, errors) - if strings_only and (s is None or isinstance(s, int)): - return s - if not isinstance(s, six.string_types): - try: - if six.PY3: - return six.text_type(s).encode(encoding) - else: - return bytes(s) - except UnicodeEncodeError: - if isinstance(s, Exception): - # An Exception subclass containing non-ASCII data that doesn't - # know how to print itself properly. We shouldn't raise a - # further exception. - return b' '.join([force_bytes(arg, encoding, strings_only, - errors) for arg in s]) - return six.text_type(s).encode(encoding, errors) - else: - return s.encode(encoding, errors) - -if six.PY3: - smart_str = smart_text - force_str = force_text -else: - smart_str = smart_bytes - force_str = force_bytes - # backwards compatibility for Python 2 - smart_unicode = smart_text - force_unicode = force_text - -smart_str.__doc__ = """\ -Apply smart_text in Python 3 and smart_bytes in Python 2. - -This is suitable for writing to sys.stdout (for instance). -""" - -force_str.__doc__ = """\ -Apply force_text in Python 3 and force_bytes in Python 2. -""" - -def iri_to_uri(iri): - """ - Convert an Internationalized Resource Identifier (IRI) portion to a URI - portion that is suitable for inclusion in a URL. - - This is the algorithm from section 3.1 of RFC 3987. However, since we are - assuming input is either UTF-8 or unicode already, we can simplify things a - little from the full method. - - Returns an ASCII string containing the encoded result. - """ - # The list of safe characters here is constructed from the "reserved" and - # "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986: - # reserved = gen-delims / sub-delims - # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" - # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" - # / "*" / "+" / "," / ";" / "=" - # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - # Of the unreserved characters, urllib.quote already considers all but - # the ~ safe. - # The % character is also added to the list of safe characters here, as the - # end of section 3.1 of RFC 3987 specifically mentions that % must not be - # converted. - if iri is None: - return iri - return quote(force_bytes(iri), safe=b"/#%[]=:;$&()+,!?*@'~") - -def filepath_to_uri(path): - """Convert a file system path to a URI portion that is suitable for - inclusion in a URL. - - We are assuming input is either UTF-8 or unicode already. - - This method will encode certain chars that would normally be recognized as - special chars for URIs. Note that this method does not encode the ' - character, as it is a valid character within URIs. See - encodeURIComponent() JavaScript function for more details. - - Returns an ASCII string containing the encoded result. - """ - if path is None: - return path - # I know about `os.sep` and `os.altsep` but I want to leave - # some flexibility for hardcoding separators. - return quote(force_bytes(path).replace(b"\\", b"/"), safe=b"/~!*()'") - -def get_system_encoding(): - """ - The encoding of the default system locale but falls back to the given - fallback encoding if the encoding is unsupported by python or could - not be determined. See tickets #10335 and #5846 - """ - try: - encoding = locale.getdefaultlocale()[1] or 'ascii' - codecs.lookup(encoding) - except Exception: - encoding = 'ascii' - return encoding - -DEFAULT_LOCALE_ENCODING = get_system_encoding() diff --git a/future/utils/frompy2.py b/future/utils/frompy2.py deleted file mode 100644 index c76007fe..00000000 --- a/future/utils/frompy2.py +++ /dev/null @@ -1,70 +0,0 @@ -""" -A resurrection of some old functions from Python 2. These should be used -sparingly, to help with porting efforts, since code using them is no -longer standard Python 3 code. - -We provide these builtin functions which have no equivalent on Py3: - -- cmp() -- execfile() - -These aliases are also provided: - -- raw_input() <- input() -- unicode() <- str() -- unichr() <- chr() - -For reference, the following Py2 builtin functions are available from -these standard locations on both Py2.6+ and Py3: - -- reduce() <- functools.reduce() -- reload() <- imp.reload() - -""" - -from __future__ import unicode_literals - -from future.utils import PY3 - - -if PY3: - # Bring back the cmp function - cmp = lambda a, b: (a > b) - (a < b) - raw_input = input - unicode = str - unichr = chr -else: - cmp = __builtin__.cmp - raw_input = __builtin__.raw_input - unicode = __builtin__.unicode - unichr = __builtin__.unichr - - -def execfile(filename, myglobals=None, mylocals=None): - if PY3: - mylocals = mylocals if (mylocals is not None) else myglobals - exec_(compile(open(filename).read(), filename, 'exec'), - myglobals, mylocals) - else: - if sys.platform == 'win32': - # The rstrip() is necessary b/c trailing whitespace in - # files will cause an IndentationError in Python 2.6 - # (this was fixed in 2.7). See IPython issue 1027. - scripttext = __builtin__.open(filename).read().rstrip() + '\n' - # compile converts unicode filename to str assuming - # ascii. Let's do the conversion before calling compile - if isinstance(filename, unicode): - filename = filename.encode(unicode, 'replace') - # else: - # filename = filename - exec_(compile(scripttext, filename, 'exec') in glob, loc) - else: - if isinstance(filename, unicode): - filename = filename.encode(sys.getfilesystemencoding()) - else: - filename = filename - __builtin__.execfile(filename, myglobals=myglobals, - mylocals=mylocals) - - -__all__ = ['cmp', 'raw_input', 'unichr', 'unicode', 'execfile'] diff --git a/future/utils/six.py b/future/utils/six.py deleted file mode 100644 index d972faf3..00000000 --- a/future/utils/six.py +++ /dev/null @@ -1,582 +0,0 @@ -"""Utilities for writing code that runs on Python 2 and 3""" - -# Copyright (c) 2010-2013 Benjamin Peterson -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import operator -import sys -import types - -__author__ = "Benjamin Peterson " -__version__ = "1.4.1" - - -# Useful for very coarse version differentiation. -PY2 = sys.version_info[0] == 2 -PY3 = sys.version_info[0] == 3 - -if PY3: - string_types = str, - integer_types = int, - class_types = type, - text_type = str - binary_type = bytes - - MAXSIZE = sys.maxsize -else: - string_types = basestring, - integer_types = (int, long) - class_types = (type, types.ClassType) - text_type = unicode - binary_type = str - - if sys.platform.startswith("java"): - # Jython always uses 32 bits. - MAXSIZE = int((1 << 31) - 1) - else: - # It's possible to have sizeof(long) != sizeof(Py_ssize_t). - class X(object): - def __len__(self): - return 1 << 31 - try: - len(X()) - except OverflowError: - # 32-bit - MAXSIZE = int((1 << 31) - 1) - else: - # 64-bit - MAXSIZE = int((1 << 63) - 1) - del X - - -def _add_doc(func, doc): - """Add documentation to a function.""" - func.__doc__ = doc - - -def _import_module(name): - """Import module, returning the module after the last dot.""" - __import__(name) - return sys.modules[name] - - -class _LazyDescr(object): - - def __init__(self, name): - self.name = name - - def __get__(self, obj, tp): - result = self._resolve() - setattr(obj, self.name, result) - # This is a bit ugly, but it avoids running this again. - delattr(tp, self.name) - return result - - -class MovedModule(_LazyDescr): - - def __init__(self, name, old, new=None): - super(MovedModule, self).__init__(name) - if PY3: - if new is None: - new = name - self.mod = new - else: - self.mod = old - - def _resolve(self): - return _import_module(self.mod) - - -class MovedAttribute(_LazyDescr): - - def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): - super(MovedAttribute, self).__init__(name) - if PY3: - if new_mod is None: - new_mod = name - self.mod = new_mod - if new_attr is None: - if old_attr is None: - new_attr = name - else: - new_attr = old_attr - self.attr = new_attr - else: - self.mod = old_mod - if old_attr is None: - old_attr = name - self.attr = old_attr - - def _resolve(self): - module = _import_module(self.mod) - return getattr(module, self.attr) - - - -class _MovedItems(types.ModuleType): - """Lazy loading of moved objects""" - - -_moved_attributes = [ - MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), - MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), - MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"), - MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), - MovedAttribute("map", "itertools", "builtins", "imap", "map"), - MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), - MovedAttribute("reload_module", "__builtin__", "imp", "reload"), - MovedAttribute("reduce", "__builtin__", "functools"), - MovedAttribute("StringIO", "StringIO", "io"), - MovedAttribute("UserString", "UserString", "collections"), - MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), - MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), - MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"), - - MovedModule("builtins", "__builtin__"), - MovedModule("configparser", "ConfigParser"), - MovedModule("copyreg", "copy_reg"), - MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), - MovedModule("http_cookies", "Cookie", "http.cookies"), - MovedModule("html_entities", "htmlentitydefs", "html.entities"), - MovedModule("html_parser", "HTMLParser", "html.parser"), - MovedModule("http_client", "httplib", "http.client"), - MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), - MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), - MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), - MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), - MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), - MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), - MovedModule("cPickle", "cPickle", "pickle"), - MovedModule("queue", "Queue"), - MovedModule("reprlib", "repr"), - MovedModule("socketserver", "SocketServer"), - MovedModule("tkinter", "Tkinter"), - MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), - MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), - MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), - MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), - MovedModule("tkinter_tix", "Tix", "tkinter.tix"), - MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), - MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), - MovedModule("tkinter_colorchooser", "tkColorChooser", - "tkinter.colorchooser"), - MovedModule("tkinter_commondialog", "tkCommonDialog", - "tkinter.commondialog"), - MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), - MovedModule("tkinter_font", "tkFont", "tkinter.font"), - MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), - MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", - "tkinter.simpledialog"), - MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"), - MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"), - MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"), - MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), - MovedModule("winreg", "_winreg"), -] -for attr in _moved_attributes: - setattr(_MovedItems, attr.name, attr) -del attr - -moves = sys.modules[__name__ + ".moves"] = _MovedItems(__name__ + ".moves") - - - -class Module_six_moves_urllib_parse(types.ModuleType): - """Lazy loading of moved objects in six.moves.urllib_parse""" - - -_urllib_parse_moved_attributes = [ - MovedAttribute("ParseResult", "urlparse", "urllib.parse"), - MovedAttribute("parse_qs", "urlparse", "urllib.parse"), - MovedAttribute("parse_qsl", "urlparse", "urllib.parse"), - MovedAttribute("urldefrag", "urlparse", "urllib.parse"), - MovedAttribute("urljoin", "urlparse", "urllib.parse"), - MovedAttribute("urlparse", "urlparse", "urllib.parse"), - MovedAttribute("urlsplit", "urlparse", "urllib.parse"), - MovedAttribute("urlunparse", "urlparse", "urllib.parse"), - MovedAttribute("urlunsplit", "urlparse", "urllib.parse"), - MovedAttribute("quote", "urllib", "urllib.parse"), - MovedAttribute("quote_plus", "urllib", "urllib.parse"), - MovedAttribute("unquote", "urllib", "urllib.parse"), - MovedAttribute("unquote_plus", "urllib", "urllib.parse"), - MovedAttribute("urlencode", "urllib", "urllib.parse"), -] -for attr in _urllib_parse_moved_attributes: - setattr(Module_six_moves_urllib_parse, attr.name, attr) -del attr - -sys.modules[__name__ + ".moves.urllib_parse"] = Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse") -sys.modules[__name__ + ".moves.urllib.parse"] = Module_six_moves_urllib_parse(__name__ + ".moves.urllib.parse") - - -class Module_six_moves_urllib_error(types.ModuleType): - """Lazy loading of moved objects in six.moves.urllib_error""" - - -_urllib_error_moved_attributes = [ - MovedAttribute("URLError", "urllib2", "urllib.error"), - MovedAttribute("HTTPError", "urllib2", "urllib.error"), - MovedAttribute("ContentTooShortError", "urllib", "urllib.error"), -] -for attr in _urllib_error_moved_attributes: - setattr(Module_six_moves_urllib_error, attr.name, attr) -del attr - -sys.modules[__name__ + ".moves.urllib_error"] = Module_six_moves_urllib_error(__name__ + ".moves.urllib_error") -sys.modules[__name__ + ".moves.urllib.error"] = Module_six_moves_urllib_error(__name__ + ".moves.urllib.error") - - -class Module_six_moves_urllib_request(types.ModuleType): - """Lazy loading of moved objects in six.moves.urllib_request""" - - -_urllib_request_moved_attributes = [ - MovedAttribute("urlopen", "urllib2", "urllib.request"), - MovedAttribute("install_opener", "urllib2", "urllib.request"), - MovedAttribute("build_opener", "urllib2", "urllib.request"), - MovedAttribute("pathname2url", "urllib", "urllib.request"), - MovedAttribute("url2pathname", "urllib", "urllib.request"), - MovedAttribute("getproxies", "urllib", "urllib.request"), - MovedAttribute("Request", "urllib2", "urllib.request"), - MovedAttribute("OpenerDirector", "urllib2", "urllib.request"), - MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"), - MovedAttribute("ProxyHandler", "urllib2", "urllib.request"), - MovedAttribute("BaseHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"), - MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"), - MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"), - MovedAttribute("FileHandler", "urllib2", "urllib.request"), - MovedAttribute("FTPHandler", "urllib2", "urllib.request"), - MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"), - MovedAttribute("UnknownHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"), - MovedAttribute("urlretrieve", "urllib", "urllib.request"), - MovedAttribute("urlcleanup", "urllib", "urllib.request"), - MovedAttribute("URLopener", "urllib", "urllib.request"), - MovedAttribute("FancyURLopener", "urllib", "urllib.request"), -] -for attr in _urllib_request_moved_attributes: - setattr(Module_six_moves_urllib_request, attr.name, attr) -del attr - -sys.modules[__name__ + ".moves.urllib_request"] = Module_six_moves_urllib_request(__name__ + ".moves.urllib_request") -sys.modules[__name__ + ".moves.urllib.request"] = Module_six_moves_urllib_request(__name__ + ".moves.urllib.request") - - -class Module_six_moves_urllib_response(types.ModuleType): - """Lazy loading of moved objects in six.moves.urllib_response""" - - -_urllib_response_moved_attributes = [ - MovedAttribute("addbase", "urllib", "urllib.response"), - MovedAttribute("addclosehook", "urllib", "urllib.response"), - MovedAttribute("addinfo", "urllib", "urllib.response"), - MovedAttribute("addinfourl", "urllib", "urllib.response"), -] -for attr in _urllib_response_moved_attributes: - setattr(Module_six_moves_urllib_response, attr.name, attr) -del attr - -sys.modules[__name__ + ".moves.urllib_response"] = Module_six_moves_urllib_response(__name__ + ".moves.urllib_response") -sys.modules[__name__ + ".moves.urllib.response"] = Module_six_moves_urllib_response(__name__ + ".moves.urllib.response") - - -class Module_six_moves_urllib_robotparser(types.ModuleType): - """Lazy loading of moved objects in six.moves.urllib_robotparser""" - - -_urllib_robotparser_moved_attributes = [ - MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"), -] -for attr in _urllib_robotparser_moved_attributes: - setattr(Module_six_moves_urllib_robotparser, attr.name, attr) -del attr - -sys.modules[__name__ + ".moves.urllib_robotparser"] = Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib_robotparser") -sys.modules[__name__ + ".moves.urllib.robotparser"] = Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser") - - -class Module_six_moves_urllib(types.ModuleType): - """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" - parse = sys.modules[__name__ + ".moves.urllib_parse"] - error = sys.modules[__name__ + ".moves.urllib_error"] - request = sys.modules[__name__ + ".moves.urllib_request"] - response = sys.modules[__name__ + ".moves.urllib_response"] - robotparser = sys.modules[__name__ + ".moves.urllib_robotparser"] - - -sys.modules[__name__ + ".moves.urllib"] = Module_six_moves_urllib(__name__ + ".moves.urllib") - - -def add_move(move): - """Add an item to six.moves.""" - setattr(_MovedItems, move.name, move) - - -def remove_move(name): - """Remove item from six.moves.""" - try: - delattr(_MovedItems, name) - except AttributeError: - try: - del moves.__dict__[name] - except KeyError: - raise AttributeError("no such move, %r" % (name,)) - - -if PY3: - _meth_func = "__func__" - _meth_self = "__self__" - - _func_closure = "__closure__" - _func_code = "__code__" - _func_defaults = "__defaults__" - _func_globals = "__globals__" - - _iterkeys = "keys" - _itervalues = "values" - _iteritems = "items" - _iterlists = "lists" -else: - _meth_func = "im_func" - _meth_self = "im_self" - - _func_closure = "func_closure" - _func_code = "func_code" - _func_defaults = "func_defaults" - _func_globals = "func_globals" - - _iterkeys = "iterkeys" - _itervalues = "itervalues" - _iteritems = "iteritems" - _iterlists = "iterlists" - - -try: - advance_iterator = next -except NameError: - def advance_iterator(it): - return it.next() -next = advance_iterator - - -try: - callable = callable -except NameError: - def callable(obj): - return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) - - -if PY3: - def get_unbound_function(unbound): - return unbound - - create_bound_method = types.MethodType - - Iterator = object -else: - def get_unbound_function(unbound): - return unbound.im_func - - def create_bound_method(func, obj): - return types.MethodType(func, obj, obj.__class__) - - class Iterator(object): - - def next(self): - return type(self).__next__(self) - - callable = callable -_add_doc(get_unbound_function, - """Get the function out of a possibly unbound function""") - - -get_method_function = operator.attrgetter(_meth_func) -get_method_self = operator.attrgetter(_meth_self) -get_function_closure = operator.attrgetter(_func_closure) -get_function_code = operator.attrgetter(_func_code) -get_function_defaults = operator.attrgetter(_func_defaults) -get_function_globals = operator.attrgetter(_func_globals) - - -def iterkeys(d, **kw): - """Return an iterator over the keys of a dictionary.""" - return iter(getattr(d, _iterkeys)(**kw)) - -def itervalues(d, **kw): - """Return an iterator over the values of a dictionary.""" - return iter(getattr(d, _itervalues)(**kw)) - -def iteritems(d, **kw): - """Return an iterator over the (key, value) pairs of a dictionary.""" - return iter(getattr(d, _iteritems)(**kw)) - -def iterlists(d, **kw): - """Return an iterator over the (key, [values]) pairs of a dictionary.""" - return iter(getattr(d, _iterlists)(**kw)) - - -if PY3: - def b(s): - return s.encode("latin-1") - def u(s): - return s - unichr = chr - if sys.version_info[1] <= 1: - def int2byte(i): - return bytes((i,)) - else: - # This is about 2x faster than the implementation above on 3.2+ - int2byte = operator.methodcaller("to_bytes", 1, "big") - byte2int = operator.itemgetter(0) - indexbytes = operator.getitem - iterbytes = iter - import io - StringIO = io.StringIO - BytesIO = io.BytesIO -else: - def b(s): - return s - def u(s): - return unicode(s, "unicode_escape") - unichr = unichr - int2byte = chr - def byte2int(bs): - return ord(bs[0]) - def indexbytes(buf, i): - return ord(buf[i]) - def iterbytes(buf): - return (ord(byte) for byte in buf) - import StringIO - StringIO = BytesIO = StringIO.StringIO -_add_doc(b, """Byte literal""") -_add_doc(u, """Text literal""") - - -if PY3: - exec_ = getattr(moves.builtins, "exec") - - - def reraise(tp, value, tb=None): - if value.__traceback__ is not tb: - raise value.with_traceback(tb) - raise value - -else: - def exec_(_code_, _globs_=None, _locs_=None): - """Execute code in a namespace.""" - if _globs_ is None: - frame = sys._getframe(1) - _globs_ = frame.f_globals - if _locs_ is None: - _locs_ = frame.f_locals - del frame - elif _locs_ is None: - _locs_ = _globs_ - exec("""exec _code_ in _globs_, _locs_""") - - - exec_("""def reraise(tp, value, tb=None): - raise tp, value, tb -""") - - -print_ = getattr(moves.builtins, "print", None) -if print_ is None: - def print_(*args, **kwargs): - """The new-style print function for Python 2.4 and 2.5.""" - fp = kwargs.pop("file", sys.stdout) - if fp is None: - return - def write(data): - if not isinstance(data, basestring): - data = str(data) - # If the file has an encoding, encode unicode with it. - if (isinstance(fp, file) and - isinstance(data, unicode) and - fp.encoding is not None): - errors = getattr(fp, "errors", None) - if errors is None: - errors = "strict" - data = data.encode(fp.encoding, errors) - fp.write(data) - want_unicode = False - sep = kwargs.pop("sep", None) - if sep is not None: - if isinstance(sep, unicode): - want_unicode = True - elif not isinstance(sep, str): - raise TypeError("sep must be None or a string") - end = kwargs.pop("end", None) - if end is not None: - if isinstance(end, unicode): - want_unicode = True - elif not isinstance(end, str): - raise TypeError("end must be None or a string") - if kwargs: - raise TypeError("invalid keyword arguments to print()") - if not want_unicode: - for arg in args: - if isinstance(arg, unicode): - want_unicode = True - break - if want_unicode: - newline = unicode("\n") - space = unicode(" ") - else: - newline = "\n" - space = " " - if sep is None: - sep = space - if end is None: - end = newline - for i, arg in enumerate(args): - if i: - write(sep) - write(arg) - write(end) - -_add_doc(reraise, """Reraise an exception.""") - - -def with_metaclass(meta, *bases): - """Create a base class with a metaclass.""" - return meta("NewBase", bases, {}) - -def add_metaclass(metaclass): - """Class decorator for creating a class with a metaclass.""" - def wrapper(cls): - orig_vars = cls.__dict__.copy() - orig_vars.pop('__dict__', None) - orig_vars.pop('__weakref__', None) - for slots_var in orig_vars.get('__slots__', ()): - orig_vars.pop(slots_var) - return metaclass(cls.__name__, cls.__bases__, orig_vars) - return wrapper diff --git a/futurize.py b/futurize.py index b69785a2..09feaf59 100755 --- a/futurize.py +++ b/futurize.py @@ -3,23 +3,22 @@ futurize.py =========== -Like Armin Ronacher's ``modernize.py``, but using the ``future`` package rather than a direct dependency on ``six``'. +This script is only used by the unit tests. Another script called +"futurize" is created automatically (without the .py extension) by +setuptools. -futurize.py attempts to turn Py2 code into valid, clean Py3 code that is also -compatible with Py2 when using the ``future`` package. +futurize.py attempts to turn Py2 code into valid, clean Py3 code that is +also compatible with Py2 when using the ``future`` package. Licensing --------- -Copyright 2013 Python Charmers Pty Ltd, Australia. +Copyright 2013-2024 Python Charmers, Australia. The software is distributed under an MIT licence. See LICENSE.txt. """ -import os +import sys from libfuturize.main import main -# We use os._exit() because sys.exit() seems to interact badly with -# subprocess.check_output() ... -os._exit(main()) - +sys.exit(main()) diff --git a/libfuturize/fixes2/fix_raise.py b/libfuturize/fixes2/fix_raise.py deleted file mode 100644 index 3e8323de..00000000 --- a/libfuturize/fixes2/fix_raise.py +++ /dev/null @@ -1,73 +0,0 @@ -"""Fixer for 'raise E, V' - -From Armin Ronacher's ``python-modernize``. - -raise -> raise -raise E -> raise E -raise E, V -> raise E(V) - -raise (((E, E'), E''), E'''), V -> raise E(V) - - -CAVEATS: -1) "raise E, V" will be incorrectly translated if V is an exception - instance. The correct Python 3 idiom is - - raise E from V - - but since we can't detect instance-hood by syntax alone and since - any client code would have to be changed as well, we don't automate - this. -""" -# Author: Collin Winter, Armin Ronacher - -# Local imports -from lib2to3 import pytree, fixer_base -from lib2to3.pgen2 import token -from lib2to3.fixer_util import Name, Call, is_tuple - -class FixRaise(fixer_base.BaseFix): - - BM_compatible = True - PATTERN = """ - raise_stmt< 'raise' exc=any [',' val=any] > - """ - - def transform(self, node, results): - syms = self.syms - - exc = results["exc"].clone() - if exc.type == token.STRING: - msg = "Python 3 does not support string exceptions" - self.cannot_convert(node, msg) - return - - # Python 2 supports - # raise ((((E1, E2), E3), E4), E5), V - # as a synonym for - # raise E1, V - # Since Python 3 will not support this, we recurse down any tuple - # literals, always taking the first element. - if is_tuple(exc): - while is_tuple(exc): - # exc.children[1:-1] is the unparenthesized tuple - # exc.children[1].children[0] is the first element of the tuple - exc = exc.children[1].children[0].clone() - exc.prefix = u" " - - if "val" not in results: - # One-argument raise - new = pytree.Node(syms.raise_stmt, [Name(u"raise"), exc]) - new.prefix = node.prefix - return new - - val = results["val"].clone() - if is_tuple(val): - args = [c.clone() for c in val.children[1:-1]] - else: - val.prefix = u"" - args = [val] - - return pytree.Node(syms.raise_stmt, - [Name(u"raise"), Call(exc, args)], - prefix=node.prefix) diff --git a/libfuturize/fixes3/__init__.py b/libfuturize/fixes3/__init__.py deleted file mode 100644 index adbf10d9..00000000 --- a/libfuturize/fixes3/__init__.py +++ /dev/null @@ -1,54 +0,0 @@ -import sys -from lib2to3 import refactor - -# The original set of these fixes comes from lib3to2 (https://bitbucket.org/amentajo/lib3to2): -libfuturize_3fix_names = set([ - 'libfuturize.fixes2.fix_add__future__imports', # from __future__ import absolute_import etc. on separate lines - 'libfuturize.fixes2.fix_add_future_standard_library_import', # we force adding this import for now, even if it doesn't seem necessary to the fix_future_standard_library fixer, for ease of testing - 'libfuturize.fixes2.fix_order___future__imports', # consolidates to a single line to simplify testing - 'libfuturize.fixes3.fix_future_builtins', # adds "from future.builtins import *" - 'libfuturize.fixes2.fix_future_standard_library', # adds "from future import standard_library" - - 'libfuturize.fixes3.fix_annotations', - # 'libfuturize.fixes3.fix_bitlength', # ints have this in Py2.7 - # 'libfuturize.fixes3.fix_bool', # need a decorator or Mixin - # 'libfuturize.fixes3.fix_bytes', # leave bytes as bytes - # 'libfuturize.fixes3.fix_classdecorator', # available in - # Py2.6+ - # 'libfuturize.fixes3.fix_collections', hmmm ... - # 'libfuturize.fixes3.fix_dctsetcomp', # avail in Py27 - 'libfuturize.fixes3.fix_division', # yes - # 'libfuturize.fixes3.fix_except', # avail in Py2.6+ - # 'libfuturize.fixes3.fix_features', # ? - 'libfuturize.fixes3.fix_fullargspec', - # 'libfuturize.fixes3.fix_funcattrs', - 'libfuturize.fixes3.fix_getcwd', - 'libfuturize.fixes3.fix_imports', # adds "from future import standard_library" - 'libfuturize.fixes3.fix_imports2', - # 'libfuturize.fixes3.fix_input', - # 'libfuturize.fixes3.fix_int', - # 'libfuturize.fixes3.fix_intern', - # 'libfuturize.fixes3.fix_itertools', - 'libfuturize.fixes3.fix_kwargs', # yes, we want this - # 'libfuturize.fixes3.fix_memoryview', - # 'libfuturize.fixes3.fix_metaclass', # write a custom handler for - # this - # 'libfuturize.fixes3.fix_methodattrs', # __func__ and __self__ seem to be defined on Py2.7 already - 'libfuturize.fixes3.fix_newstyle', # yes, we want this: explicit inheritance from object. Without new-style classes in Py2, super() will break etc. - # 'libfuturize.fixes3.fix_next', # use a decorator for this - # 'libfuturize.fixes3.fix_numliterals', # prob not - # 'libfuturize.fixes3.fix_open', # huh? - # 'libfuturize.fixes3.fix_print', # no way - 'libfuturize.fixes3.fix_printfunction', # adds __future__ import print_function - 'libfuturize.fixes3.fix_raise', # yes, if 'raise E, V, T' is supported on Py3 - # 'libfuturize.fixes3.fix_range', # nope - # 'libfuturize.fixes3.fix_reduce', - # 'libfuturize.fixes3.fix_setliteral', - # 'libfuturize.fixes3.fix_str', - # 'libfuturize.fixes3.fix_super', # maybe, if our magic super() isn't robust enough - 'libfuturize.fixes3.fix_throw', # yes, if Py3 supports it - # 'libfuturize.fixes3.fix_unittest', - 'libfuturize.fixes3.fix_unpacking', # yes, this is useful - # 'libfuturize.fixes3.fix_with' # way out of date - ]) - diff --git a/libfuturize/fixes3/fix_future_builtins.py b/libfuturize/fixes3/fix_future_builtins.py deleted file mode 100644 index d8229806..00000000 --- a/libfuturize/fixes3/fix_future_builtins.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -For the ``future`` package. - -Adds this import line: - - from future.builtins import * - -after any other imports (in an initial block of them). -""" - -from ..fixes2.fix_future_builtins import FixFutureBuiltins diff --git a/libfuturize/fixes3/fix_newstyle.py b/libfuturize/fixes3/fix_newstyle.py deleted file mode 100644 index 6420e94d..00000000 --- a/libfuturize/fixes3/fix_newstyle.py +++ /dev/null @@ -1,22 +0,0 @@ -u""" -Fixer for "class Foo: ..." -> "class Foo(object): ..." -""" - -from lib2to3 import fixer_base -from lib2to3.fixer_util import Node, Leaf, token, syms, LParen, RParen, Name -# from lib2to3.fixer_util import Name, syms, Node, Leaf, Newline, find_root -from lib2to3.pygram import token - -def insert_object(node, idx): - node.insert_child(idx, RParen()) - node.insert_child(idx, Name(u"object")) - node.insert_child(idx, LParen()) - -class FixNewstyle(fixer_base.BaseFix): - - PATTERN = u"classdef< 'class' NAME colon=':' any >" - - def transform(self, node, results): - colon = results[u"colon"] - idx = node.children.index(colon) - insert_object(node, idx) diff --git a/libfuturize/main.py b/libfuturize/main.py deleted file mode 100644 index 6e5919c6..00000000 --- a/libfuturize/main.py +++ /dev/null @@ -1,221 +0,0 @@ -""" -futurize: automatic conversion to clean 2&3 code using ``python-future`` -====================================================================== - -Like Armin Ronacher's modernize.py, ``futurize`` attempts to produce clean -standard Python 3 code that runs on both Py2 and Py3. - -One pass --------- - -Use it like this on Python 2 code: - - $ futurize --verbose mypython2script.py - -This will attempt to port the code to standard Py3 code that also -provides Py2 compatibility with the help of the right imports from -``future``. To write the changes to disk, use the -w flag. - -Or, to make existing Python 3 code compatible with both Python 2 and 3 -using the ``future`` package: - - $ futurize --from3 --verbose mypython3script.py - -which removes any Py3-only syntax (e.g. new metaclasses) and adds these -import lines: - - from __future__ import (absolute_import, division, - print_function, unicode_literals) - from future import standard_library - from future.builtins import * - -To write changes to the files, use the -w flag. - -Two stages ----------- - -The ``futurize`` script can also be called in two separate stages. First: - - $ futurize --stage1 mypython2script.py - -This produces more modern Python 2 code that is not yet compatible with Python -3. The tests should still run and the diff should be uncontroversial to apply to -most Python projects that are willing to drop support for Python 2.5 and lower. - -After this, the recommended approach is to explicitly mark all strings that must -be byte-strings with a b'' prefix, and then invoke the second stage with: - - $ futurize --stage2 mypython2script.py - -This implicitly turns all unadorned string literals into unicode strings (Py3 -str) and makes the additional changes needed to support Python 3. This stage -introduces a dependency on ``future`` to restore Py2 support. - -If you would prefer instead to mark all your text strings explicitly with u'' -prefixes and have all unadorned '' strings converted to byte-strings, use this: - - $ futurize --stage2 --tobytes mypython2script.py - -Note that this even includes docstrings. - -Separate stages are not available (or needed) when converting from Python 3. -""" - -from __future__ import (absolute_import, print_function, unicode_literals) -from future.builtins import * - -import sys -import logging -import optparse - -from lib2to3.main import main, warn, StdoutRefactoringTool -from lib2to3 import refactor - -from libfuturize.fixes2 import (lib2to3_fix_names_stage1, - lib2to3_fix_names_stage2, - libfuturize_2fix_names_stage1, - libfuturize_2fix_names_stage2) -from libfuturize.fixes3 import libfuturize_3fix_names - - -def main(args=None): - """Main program. - - Returns a suggested exit status (0, 1, 2). - """ - # Set up option parser - parser = optparse.OptionParser(usage="futurize [options] file|dir ...") - parser.add_option("-a", "--all-imports", action="store_true", - help="Adds all __future__ and future imports to each module") - parser.add_option("-d", "--doctests_only", action="store_true", - help="Fix up doctests only") - parser.add_option("-b", "--tobytes", action="store_true", - help="Convert all unadorned string literals to bytes objects") - parser.add_option("-1", "--stage1", action="store_true", - help="Modernize Python 2 code only; no compatibility with Python 3 (or dependency on ``future``)") - parser.add_option("-2", "--stage2", action="store_true", - help="Take modernized (stage1) code and add a dependency on ``future`` to provide Py3 compatibility.") - parser.add_option("-0", "--both-stages", action="store_true", - help="Apply both stages 1 and 2") - # parser.add_option("-f", "--fix", action="append", default=[], - # help="Each FIX specifies a transformation; default: all") - parser.add_option("-j", "--processes", action="store", default=1, - type="int", help="Run 2to3 concurrently") - parser.add_option("-x", "--nofix", action="append", default=[], - help="Prevent a fixer from being run.") - parser.add_option("-l", "--list-fixes", action="store_true", - help="List available transformations") - # parser.add_option("-p", "--print-function", action="store_true", - # help="Modify the grammar so that print() is a function") - parser.add_option("-v", "--verbose", action="store_true", - help="More verbose logging") - parser.add_option("--no-diffs", action="store_true", - help="Don't show diffs of the refactoring") - parser.add_option("-w", "--write", action="store_true", - help="Write back modified files") - parser.add_option("-n", "--nobackups", action="store_true", default=False, - help="Don't write backups for modified files.") - parser.add_option("--from3", action="store_true", default=False, - help="Assume the code is already Python 3 and just " - "requires ``__future__`` and ``future`` imports.") - - # Parse command line arguments - refactor_stdin = False - flags = {} - options, args = parser.parse_args(args) - if options.from3: - assert not (options.stage1 or options.stage2) - assert not options.tobytes - fixer_pkg = 'libfuturize.fixes3' - avail_fixes = libfuturize_3fix_names - flags["print_function"] = True - else: - fixer_pkg = 'libfuturize.fixes2' - avail_fixes = set() - if not (options.stage1 or options.stage2): - options.both_stages = True - else: - assert options.both_stages is None - options.both_stages = False - if options.stage1 or options.both_stages: - avail_fixes.update(lib2to3_fix_names_stage1) - avail_fixes.update(libfuturize_2fix_names_stage1) - if options.stage2 or options.both_stages: - avail_fixes.update(lib2to3_fix_names_stage2) - avail_fixes.update(libfuturize_2fix_names_stage2) - - if options.tobytes: - avail_fixes.add('libfuturize.fixes2.fix_bytes') - if not options.write and options.no_diffs: - warn("not writing files and not printing diffs; that's not very useful") - if not options.write and options.nobackups: - parser.error("Can't use -n without -w") - if options.list_fixes: - print("Available transformations for the -f/--fix option:") - for fixname in sorted(avail_fixes): - print(fixname) - if not args: - return 0 - if not args: - print("At least one file or directory argument required.", - file=sys.stderr) - print("Use --help to show usage.", file=sys.stderr) - return 2 - if "-" in args: - refactor_stdin = True - if options.write: - print("Can't write to stdin.", file=sys.stderr) - return 2 - - # If this option were ever needed, it would probably mean the --from3 flag - # had been forgotten. - # if options.print_function: - # flags["print_function"] = True - - # Set up logging handler - level = logging.DEBUG if options.verbose else logging.INFO - logging.basicConfig(format='%(name)s: %(message)s', level=level) - - # Initialize the refactoring tool - unwanted_fixes = set(fixer_pkg + ".fix_" + fix for fix in options.nofix) - - # The 'all-imports' option forces adding all imports __future__ and "from - # future import standard_library", even if they don't seem necessary for - # the current state of each module. (This can simplify testing, and can - # reduce the need to think about Py2 compatibility when editing the code - # further.) - extra_fixes = set() - if options.all_imports: - prefix = 'libfuturize.fixes2.' - if options.stage1: - extra_fixes.add(prefix + - 'fix_add__future__imports_except_unicode_literals') - else: - # In case the user hasn't run stage1 for some reason: - extra_fixes.add(prefix + 'fix_add__future__imports') - extra_fixes.add(prefix + 'fix_add_future_standard_library_import') - extra_fixes.add(prefix + 'fix_add_all_future_builtins') - - fixer_names = avail_fixes | extra_fixes - unwanted_fixes - - rt = StdoutRefactoringTool(sorted(fixer_names), flags, set(), - options.nobackups, not options.no_diffs) - - # Refactor all files and directories passed as arguments - if not rt.errors: - if refactor_stdin: - rt.refactor_stdin() - else: - try: - rt.refactor(args, options.write, options.doctests_only, - options.processes) - except refactor.MultiprocessingUnsupported: - assert options.processes > 1 - print("Sorry, -j isn't " \ - "supported on this platform.", file=sys.stderr) - return 1 - rt.summarize() - - # Return error status (0 if rt.errors is zero) - return int(bool(rt.errors)) - diff --git a/libfuturize/test_scripts/py2/check_super_2to3.py b/libfuturize/test_scripts/py2/check_super_2to3.py deleted file mode 100644 index f46e03df..00000000 --- a/libfuturize/test_scripts/py2/check_super_2to3.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -This tests whether futurize keeps the old two-argument super() calls the -same as before. It should, because this still works in Py3. -""" -from __future__ import print_function -from future.builtins import * - -class VerboseList(list): - def append(self, item): - print('Adding an item') - super(VerboseList, self).append(item) diff --git a/libfuturize/test_scripts/py2/check_super_2to3.py2 b/libfuturize/test_scripts/py2/check_super_2to3.py2 deleted file mode 100644 index 65b24354..00000000 --- a/libfuturize/test_scripts/py2/check_super_2to3.py2 +++ /dev/null @@ -1,9 +0,0 @@ -""" -This tests whether futurize keeps the old two-argument super() calls the -same as before. It should, because this still works in Py3. -""" - -class VerboseList(list): - def append(self, item): - print 'Adding an item' - super(VerboseList, self).append(item) diff --git a/libfuturize/test_scripts/py2/implicit_relative_import.py b/libfuturize/test_scripts/py2/implicit_relative_import.py deleted file mode 100644 index 6653d744..00000000 --- a/libfuturize/test_scripts/py2/implicit_relative_import.py +++ /dev/null @@ -1,8 +0,0 @@ -''' -Tests whether implicit relative imports are turned into explicit ones. -''' - -from __future__ import absolute_import -from future.builtins import * - -from . import xrange diff --git a/libfuturize/test_scripts/py2/implicit_relative_import.py2 b/libfuturize/test_scripts/py2/implicit_relative_import.py2 deleted file mode 100644 index 3af07a00..00000000 --- a/libfuturize/test_scripts/py2/implicit_relative_import.py2 +++ /dev/null @@ -1,5 +0,0 @@ -''' -Tests whether implicit relative imports are turned into explicit ones. -''' - -import xrange diff --git a/libfuturize/test_scripts/py2/old_exception_print.py b/libfuturize/test_scripts/py2/old_exception_print.py deleted file mode 100644 index 2e90958d..00000000 --- a/libfuturize/test_scripts/py2/old_exception_print.py +++ /dev/null @@ -1,12 +0,0 @@ -""" -Test of whether futurize handles the old-style exception Syntax -""" - -from __future__ import print_function -from future.builtins import * - -def hello(): - try: - print("Hello, world") - except IOError as e: - print(e.errno) diff --git a/libfuturize/test_scripts/py2/old_exception_print.py2 b/libfuturize/test_scripts/py2/old_exception_print.py2 deleted file mode 100644 index cc8621d5..00000000 --- a/libfuturize/test_scripts/py2/old_exception_print.py2 +++ /dev/null @@ -1,8 +0,0 @@ -""" -Test of whether futurize handles the old-style exception Syntax -""" -def hello(): - try: - print "Hello, world" - except IOError, e: - print e.errno diff --git a/libfuturize/test_scripts/py2/print_range.py b/libfuturize/test_scripts/py2/print_range.py deleted file mode 100644 index ff0b2c61..00000000 --- a/libfuturize/test_scripts/py2/print_range.py +++ /dev/null @@ -1,4 +0,0 @@ -from __future__ import print_function -from future.builtins import * - -print(list(range(10))) diff --git a/libfuturize/test_scripts/py2/print_range.py2 b/libfuturize/test_scripts/py2/print_range.py2 deleted file mode 100644 index 78e3caa1..00000000 --- a/libfuturize/test_scripts/py2/print_range.py2 +++ /dev/null @@ -1 +0,0 @@ -print range(10) diff --git a/libfuturize/test_scripts/py2/print_raw_input.py b/libfuturize/test_scripts/py2/print_raw_input.py deleted file mode 100644 index c477c7be..00000000 --- a/libfuturize/test_scripts/py2/print_raw_input.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -Example Python 2 code with print statement and raw_input(). - -Check: does libfuturize automatically handle this? -""" - -from __future__ import print_function -from future.builtins import * - -def greet(name): - print("Hello, {0}!".format(name)) - -print("What's your name?") -name = input() -greet(name) diff --git a/libfuturize/test_scripts/py2/print_raw_input.py2 b/libfuturize/test_scripts/py2/print_raw_input.py2 deleted file mode 100644 index d0164912..00000000 --- a/libfuturize/test_scripts/py2/print_raw_input.py2 +++ /dev/null @@ -1,12 +0,0 @@ -""" -Example Python 2 code with print statement and raw_input(). - -Check: does libfuturize automatically handle this? -""" - -def greet(name): - print "Hello, {0}!".format(name) - -print "What's your name?" -name = raw_input() -greet(name) diff --git a/libfuturize/test_scripts/py2/print_stderr.py b/libfuturize/test_scripts/py2/print_stderr.py deleted file mode 100644 index 944f9462..00000000 --- a/libfuturize/test_scripts/py2/print_stderr.py +++ /dev/null @@ -1,6 +0,0 @@ -from __future__ import print_function -from future.builtins import * - -import sys -print('Hello', ' Ed', file=sys.stderr) - diff --git a/libfuturize/test_scripts/py2/print_stderr.py2 b/libfuturize/test_scripts/py2/print_stderr.py2 deleted file mode 100644 index 6ab90340..00000000 --- a/libfuturize/test_scripts/py2/print_stderr.py2 +++ /dev/null @@ -1,3 +0,0 @@ -import sys -print >> sys.stderr, 'Hello', ' Ed' - diff --git a/libfuturize/test_scripts/py2/problematic_string.py b/libfuturize/test_scripts/py2/problematic_string.py deleted file mode 100644 index e8cc6399..00000000 --- a/libfuturize/test_scripts/py2/problematic_string.py +++ /dev/null @@ -1,5 +0,0 @@ -r""" This docstring generates a SyntaxError on Python 3 unless it has -an r prefix. - -The folder is "C:\Users". -""" diff --git a/libfuturize/test_scripts/py2/problematic_string.py2 b/libfuturize/test_scripts/py2/problematic_string.py2 deleted file mode 100644 index e0784abc..00000000 --- a/libfuturize/test_scripts/py2/problematic_string.py2 +++ /dev/null @@ -1,5 +0,0 @@ -""" This docstring generates a SyntaxError on Python 3 unless it has -an r prefix. - -The folder is "C:\Users". -""" diff --git a/libfuturize/test_scripts/py2/source_coding_latin1.py b/libfuturize/test_scripts/py2/source_coding_latin1.py deleted file mode 100644 index 9630515e..00000000 --- a/libfuturize/test_scripts/py2/source_coding_latin1.py +++ /dev/null @@ -1,8 +0,0 @@ -# -*- coding: latin-1 -*- -''' -Tests to ensure that the source coding line is not corrupted or removed. -Also tests whether the unicode characters in this encoding are parsed -correctly and left alone. -''' - -characters = [u"", u"", ""] diff --git a/libfuturize/test_scripts/py2/source_coding_latin1.py2 b/libfuturize/test_scripts/py2/source_coding_latin1.py2 deleted file mode 100644 index 9630515e..00000000 --- a/libfuturize/test_scripts/py2/source_coding_latin1.py2 +++ /dev/null @@ -1,8 +0,0 @@ -# -*- coding: latin-1 -*- -''' -Tests to ensure that the source coding line is not corrupted or removed. -Also tests whether the unicode characters in this encoding are parsed -correctly and left alone. -''' - -characters = [u"", u"", ""] diff --git a/libfuturize/test_scripts/py2/source_coding_utf8.py b/libfuturize/test_scripts/py2/source_coding_utf8.py deleted file mode 100644 index 4fefa76d..00000000 --- a/libfuturize/test_scripts/py2/source_coding_utf8.py +++ /dev/null @@ -1,8 +0,0 @@ -# -*- coding: utf-8 -*- -''' -Tests to ensure that the source coding line is not corrupted or removed. -Also tests whether the unicode characters in this encoding are parsed -correctly and left alone. -''' - -icons = [u"◐", u"◓", u"◑", u"◒"] diff --git a/libfuturize/test_scripts/py2/source_coding_utf8.py2 b/libfuturize/test_scripts/py2/source_coding_utf8.py2 deleted file mode 100644 index 4fefa76d..00000000 --- a/libfuturize/test_scripts/py2/source_coding_utf8.py2 +++ /dev/null @@ -1,8 +0,0 @@ -# -*- coding: utf-8 -*- -''' -Tests to ensure that the source coding line is not corrupted or removed. -Also tests whether the unicode characters in this encoding are parsed -correctly and left alone. -''' - -icons = [u"◐", u"◓", u"◑", u"◒"] diff --git a/libfuturize/test_scripts/py2/stdlib_newstyleclass_printstatement.py b/libfuturize/test_scripts/py2/stdlib_newstyleclass_printstatement.py deleted file mode 100644 index 2f9a52f4..00000000 --- a/libfuturize/test_scripts/py2/stdlib_newstyleclass_printstatement.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import print_function -from future import standard_library -from future.builtins import * - -import configparser - -class Blah(object): - pass -print('Hello', end=None) diff --git a/libfuturize/test_scripts/py2/stdlib_newstyleclass_printstatement.py2 b/libfuturize/test_scripts/py2/stdlib_newstyleclass_printstatement.py2 deleted file mode 100644 index 42089aac..00000000 --- a/libfuturize/test_scripts/py2/stdlib_newstyleclass_printstatement.py2 +++ /dev/null @@ -1,5 +0,0 @@ -import ConfigParser - -class Blah(object): - pass -print 'Hello', diff --git a/libfuturize/test_scripts/py2/stdlib_print_statement.py b/libfuturize/test_scripts/py2/stdlib_print_statement.py deleted file mode 100644 index 23374c77..00000000 --- a/libfuturize/test_scripts/py2/stdlib_print_statement.py +++ /dev/null @@ -1,7 +0,0 @@ -from __future__ import absolute_import -from __future__ import print_function -from future import standard_library -from future.builtins import * - -import socketserver -print('blah') diff --git a/libfuturize/test_scripts/py2/stdlib_print_statement.py2 b/libfuturize/test_scripts/py2/stdlib_print_statement.py2 deleted file mode 100644 index 8d7d13ea..00000000 --- a/libfuturize/test_scripts/py2/stdlib_print_statement.py2 +++ /dev/null @@ -1,2 +0,0 @@ -import SocketServer -print 'blah' diff --git a/libfuturize/test_scripts/py2/stdlib_print_statement___future___absolute_import.py b/libfuturize/test_scripts/py2/stdlib_print_statement___future___absolute_import.py deleted file mode 100644 index 550abcf1..00000000 --- a/libfuturize/test_scripts/py2/stdlib_print_statement___future___absolute_import.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -Tests whether the existing __future__ statement is preserved and not -duplicated or moved below some executable statement. -""" - -from __future__ import absolute_import -from __future__ import print_function - -import socketserver -print('blah') diff --git a/libfuturize/test_scripts/py2/stdlib_print_statement___future___absolute_import.py2 b/libfuturize/test_scripts/py2/stdlib_print_statement___future___absolute_import.py2 deleted file mode 100644 index 17177cdc..00000000 --- a/libfuturize/test_scripts/py2/stdlib_print_statement___future___absolute_import.py2 +++ /dev/null @@ -1,7 +0,0 @@ -""" -Tests whether the existing __future__ statement is preserved and not -duplicated or moved below some executable statement. -""" - -import SocketServer -print 'blah' diff --git a/libfuturize/test_scripts/py2/unicode_literals.py b/libfuturize/test_scripts/py2/unicode_literals.py deleted file mode 100644 index d82a82a5..00000000 --- a/libfuturize/test_scripts/py2/unicode_literals.py +++ /dev/null @@ -1,11 +0,0 @@ -''' -Tests to ensure that the u'' and b'' prefixes on unicode and byte strings -are not removed. Removing the prefixes on Py3.3+ is unnecessary and -loses some information -- namely, that the strings have explicitly been -marked as unicode, rather than just our guess (perhaps incorrect) that -they should be unicode or bytes. -''' - -s = u'mystring' -b = b'mybytes' -icons = [u"◐", u"◓", u"◑", u"◒"] diff --git a/libfuturize/test_scripts/py2/unicode_literals.py2 b/libfuturize/test_scripts/py2/unicode_literals.py2 deleted file mode 100644 index d82a82a5..00000000 --- a/libfuturize/test_scripts/py2/unicode_literals.py2 +++ /dev/null @@ -1,11 +0,0 @@ -''' -Tests to ensure that the u'' and b'' prefixes on unicode and byte strings -are not removed. Removing the prefixes on Py3.3+ is unnecessary and -loses some information -- namely, that the strings have explicitly been -marked as unicode, rather than just our guess (perhaps incorrect) that -they should be unicode or bytes. -''' - -s = u'mystring' -b = b'mybytes' -icons = [u"◐", u"◓", u"◑", u"◒"] diff --git a/libfuturize/test_scripts/py2/xrange.py b/libfuturize/test_scripts/py2/xrange.py deleted file mode 100644 index a3379b7f..00000000 --- a/libfuturize/test_scripts/py2/xrange.py +++ /dev/null @@ -1,4 +0,0 @@ -from future.builtins import * - -for i in range(10): - pass diff --git a/libfuturize/test_scripts/py2/xrange.py2 b/libfuturize/test_scripts/py2/xrange.py2 deleted file mode 100644 index 3126104c..00000000 --- a/libfuturize/test_scripts/py2/xrange.py2 +++ /dev/null @@ -1,3 +0,0 @@ - -for i in xrange(10): - pass diff --git a/libfuturize/test_scripts/py3/example_py3_raw_input.py b/libfuturize/test_scripts/py3/example_py3_raw_input.py deleted file mode 100644 index d294aa7d..00000000 --- a/libfuturize/test_scripts/py3/example_py3_raw_input.py +++ /dev/null @@ -1,14 +0,0 @@ -""" -Example Python 3 code - -Does libfuturize --from3 handle this, or does it add an evil eval() to the -input() call? - -It should also add 'from __future__ import print_function' -""" -def greet(name): - print("Hello, {0}!".format(name)) - -print("What's your name?") -name = input() -greet(name) diff --git a/libfuturize/test_scripts/py3/implrelimport_printfunction.py b/libfuturize/test_scripts/py3/implrelimport_printfunction.py deleted file mode 100644 index bf96e8e1..00000000 --- a/libfuturize/test_scripts/py3/implrelimport_printfunction.py +++ /dev/null @@ -1,3 +0,0 @@ -import emptymodule -print('Hello', ' Ed', file=sys.stderr) - diff --git a/libfuturize/test_scripts/py3/imports_with_existing___future__import.py b/libfuturize/test_scripts/py3/imports_with_existing___future__import.py deleted file mode 100644 index 2d05bceb..00000000 --- a/libfuturize/test_scripts/py3/imports_with_existing___future__import.py +++ /dev/null @@ -1,22 +0,0 @@ -""" -An example Python 3 script with an existing __future__ import. -We don't want libfuturize to clobber or duplicate this ... -""" - -from __future__ import absolute_import - -import urllib.parse -import urllib.request -import urllib.error -import http.client -import email.message -import io -import unittest -from test import support -import os -import sys -import tempfile - -from base64 import b64encode -import collections - diff --git a/libfuturize/test_scripts/py3/kwonlyargs.py b/libfuturize/test_scripts/py3/kwonlyargs.py deleted file mode 100644 index 686eec8a..00000000 --- a/libfuturize/test_scripts/py3/kwonlyargs.py +++ /dev/null @@ -1,3 +0,0 @@ -def f(a, b, *, c=True, d='blah'): - pass - diff --git a/libfuturize/test_scripts/py3/print_range.py b/libfuturize/test_scripts/py3/print_range.py deleted file mode 100644 index 61b4000c..00000000 --- a/libfuturize/test_scripts/py3/print_range.py +++ /dev/null @@ -1 +0,0 @@ -print(range(10)) diff --git a/libfuturize/test_scripts/py3/stdlib_newstyleclass_printfunction.py b/libfuturize/test_scripts/py3/stdlib_newstyleclass_printfunction.py deleted file mode 100644 index cc5763b9..00000000 --- a/libfuturize/test_scripts/py3/stdlib_newstyleclass_printfunction.py +++ /dev/null @@ -1,5 +0,0 @@ -import configparser - -class Blah: - pass -print('Hello', end=None) diff --git a/libfuturize/test_scripts/py3/test_py3_urllib_request.py b/libfuturize/test_scripts/py3/test_py3_urllib_request.py deleted file mode 100644 index 2051338b..00000000 --- a/libfuturize/test_scripts/py3/test_py3_urllib_request.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -Example Python 3 code using the new urllib.request module. - -Does libfuturize handle this? -""" -URL = 'http://pypi.python.org/pypi/{}/json' - -package = 'future' - -import pprint -# import requests -# -# r = requests.get(URL.format(package)) -# pprint.pprint(r.json()) - -import urllib.request -r = urllib.request.urlopen(URL.format(package_name)) -pprint.pprint(r.read()) diff --git a/pasteurize.py b/pasteurize.py new file mode 100755 index 00000000..658955f6 --- /dev/null +++ b/pasteurize.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +""" +pasteurize.py +============= + +This script is only used by the unit tests. Another script called "pasteurize" +is created automatically (without the .py extension) by setuptools. + +pasteurize.py attempts to turn Py3 code into relatively clean Py3 code that is +also compatible with Py2 when using the ``future`` package. + + +Licensing +--------- +Copyright 2013-2024 Python Charmers, Australia. +The software is distributed under an MIT licence. See LICENSE.txt. +""" + +import sys + +from libpasteurize.main import main + +sys.exit(main()) diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..649908f0 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +# py.test config file +[pytest] +norecursedirs = build docs/_build disabled_test_email disabled_test_xmlrpc disabled_test_xmlrpcnet disabled/* disabled* disabled/test_email/* diff --git a/requirements_py26.txt b/requirements_py26.txt deleted file mode 100644 index b2ed2669..00000000 --- a/requirements_py26.txt +++ /dev/null @@ -1,3 +0,0 @@ -unittest2 -argparse # for the http.server module - diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..498ec14a --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[metadata] +license_file = LICENSE.txt diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 index 47783245..13b0f435 --- a/setup.py +++ b/setup.py @@ -1,82 +1,184 @@ #!/usr/bin/env python +from __future__ import absolute_import, print_function + import os +import os.path import sys + + try: from setuptools import setup except ImportError: from distutils.core import setup -import future - - -if sys.argv[-1] == 'publish': - os.system('python setup.py sdist upload') - sys.exit() NAME = "future" PACKAGES = ["future", "future.builtins", - "future.builtins.backports", - "future.tests", + "future.types", "future.standard_library", - "future.standard_library.html", - "future.standard_library.http", - "future.standard_library.test", + "future.backports", + "future.backports.email", + "future.backports.email.mime", + "future.backports.html", + "future.backports.http", + "future.backports.test", + "future.backports.urllib", + "future.backports.xmlrpc", + "future.moves", + "future.moves.dbm", + "future.moves.html", + "future.moves.http", + "future.moves.test", + "future.moves.tkinter", + "future.moves.urllib", + "future.moves.xmlrpc", + "future.tests", # for future.tests.base + # "future.tests.test_email", "future.utils", + "past", + "past.builtins", + "past.types", + "past.utils", + "past.translation", "libfuturize", - "libfuturize.fixes2", - "libfuturize.fixes3"] + "libfuturize.fixes", + "libpasteurize", + "libpasteurize.fixes", + ] + +# PEP 3108 stdlib moves: +if sys.version_info[:2] < (3, 0): + PACKAGES += [ + "builtins", + # "configparser", # removed in v0.16.0 + "copyreg", + "html", + "http", + "queue", + "reprlib", + "socketserver", + "tkinter", + "winreg", + "xmlrpc", + "_dummy_thread", + "_markupbase", + "_thread", + ] + PACKAGE_DATA = {'': [ 'README.rst', 'LICENSE.txt', 'futurize.py', - 'discover_tests.py', - 'check_rst.sh' - ]} -REQUIRES = [] -VERSION = future.__version__ + 'pasteurize.py', + 'check_rst.sh', + 'TESTING.txt', + ], + 'tests': ['*.py'], + } + +import src.future +VERSION = src.future.__version__ DESCRIPTION = "Clean single-source support for Python 3 and 2" -LONG_DESC = future.__doc__ +LONG_DESC = src.future.__doc__ AUTHOR = "Ed Schofield" AUTHOR_EMAIL = "ed@pythoncharmers.com" -URL="https://github.com/PythonCharmers/python-future" +URL="https://python-future.org" LICENSE = "MIT" -KEYWORDS = "future python3 migration backport six 2to3 futurize modernize" +KEYWORDS = "future past python3 migration futurize backport six 2to3 modernize pasteurize 3to2" CLASSIFIERS = [ "Programming Language :: Python", + "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "License :: OSI Approved", "License :: OSI Approved :: MIT License", - "Development Status :: 4 - Beta", + "Development Status :: 6 - Mature", "Intended Audience :: Developers", ] setup_kwds = {} + +# * Important * +# We forcibly remove the build folder to avoid breaking the +# user's Py3 installation if they run "python2 setup.py +# build" and then "python3 setup.py install". + +try: + # If the user happens to run: + # python2 setup.py build + # python3 setup.py install + # then folders like "copyreg" will be in build/lib. + # If so, we CANNOT let the user install this, because + # this may break his/her Python 3 install, depending on the folder order in + # sys.path. (Running "import html" etc. may pick up our Py2 + # substitute packages, instead of the intended system stdlib modules.) + SYSTEM_MODULES = set([ + '_dummy_thread', + '_markupbase', + '_thread', + 'builtins', + # Catch the case that configparser is in the build folder + # from a previous version of `future`: + 'configparser', + 'copyreg', + 'html', + 'http', + 'queue', + 'reprlib', + 'socketserver', + 'tkinter', + 'winreg', + 'xmlrpc' + ]) + + if sys.version_info[0] >= 3: + # Do any of the above folders exist in build/lib? + files = os.listdir(os.path.join('build', 'lib')) + if len(set(files) & set(SYSTEM_MODULES)) > 0: + print('ERROR: Your build folder is in an inconsistent state for ' + 'a Python 3.x install. Please remove it manually and run ' + 'setup.py again.', file=sys.stderr) + sys.exit(1) +except OSError: + pass + setup(name=NAME, version=VERSION, author=AUTHOR, author_email=AUTHOR_EMAIL, url=URL, + project_urls={ + 'Source': 'https://github.com/PythonCharmers/python-future', + }, description=DESCRIPTION, long_description=LONG_DESC, license=LICENSE, keywords=KEYWORDS, entry_points={ 'console_scripts': [ - 'futurize = libfuturize.main:main' + 'futurize = libfuturize.main:main', + 'pasteurize = libpasteurize.main:main' ] }, + package_dir={'': 'src'}, packages=PACKAGES, package_data=PACKAGE_DATA, include_package_data=True, - install_requires=REQUIRES, + python_requires=">=2.6, !=3.0.*, !=3.1.*, !=3.2.*", classifiers=CLASSIFIERS, - test_suite = "discover_tests", **setup_kwds ) - diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 00000000..acdbb31a --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,2 @@ +# Make this a package only for the sake of importing +# src.future.__version__ etc. from setup.py diff --git a/src/_dummy_thread/__init__.py b/src/_dummy_thread/__init__.py new file mode 100644 index 00000000..63dced6e --- /dev/null +++ b/src/_dummy_thread/__init__.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import +import sys +__future_module__ = True + +if sys.version_info[0] < 3: + from dummy_thread import * +else: + raise ImportError('This package should not be accessible on Python 3. ' + 'Either you are trying to run from the python-future src folder ' + 'or your installation of python-future is corrupted.') diff --git a/src/_markupbase/__init__.py b/src/_markupbase/__init__.py new file mode 100644 index 00000000..29090654 --- /dev/null +++ b/src/_markupbase/__init__.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import +import sys +__future_module__ = True + +if sys.version_info[0] < 3: + from markupbase import * +else: + raise ImportError('This package should not be accessible on Python 3. ' + 'Either you are trying to run from the python-future src folder ' + 'or your installation of python-future is corrupted.') diff --git a/src/_thread/__init__.py b/src/_thread/__init__.py new file mode 100644 index 00000000..9f2a51c7 --- /dev/null +++ b/src/_thread/__init__.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import +import sys +__future_module__ = True + +if sys.version_info[0] < 3: + from thread import * +else: + raise ImportError('This package should not be accessible on Python 3. ' + 'Either you are trying to run from the python-future src folder ' + 'or your installation of python-future is corrupted.') diff --git a/src/builtins/__init__.py b/src/builtins/__init__.py new file mode 100644 index 00000000..4f936f28 --- /dev/null +++ b/src/builtins/__init__.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import +import sys +__future_module__ = True + +if sys.version_info[0] < 3: + from __builtin__ import * + # Overwrite any old definitions with the equivalent future.builtins ones: + from future.builtins import * +else: + raise ImportError('This package should not be accessible on Python 3. ' + 'Either you are trying to run from the python-future src folder ' + 'or your installation of python-future is corrupted.') diff --git a/src/copyreg/__init__.py b/src/copyreg/__init__.py new file mode 100644 index 00000000..51bd4b9a --- /dev/null +++ b/src/copyreg/__init__.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import +import sys + +if sys.version_info[0] < 3: + from copy_reg import * +else: + raise ImportError('This package should not be accessible on Python 3. ' + 'Either you are trying to run from the python-future src folder ' + 'or your installation of python-future is corrupted.') diff --git a/src/future/__init__.py b/src/future/__init__.py new file mode 100644 index 00000000..b097fd81 --- /dev/null +++ b/src/future/__init__.py @@ -0,0 +1,92 @@ +""" +future: Easy, safe support for Python 2/3 compatibility +======================================================= + +``future`` is the missing compatibility layer between Python 2 and Python +3. It allows you to use a single, clean Python 3.x-compatible codebase to +support both Python 2 and Python 3 with minimal overhead. + +It is designed to be used as follows:: + + from __future__ import (absolute_import, division, + print_function, unicode_literals) + from builtins import ( + bytes, dict, int, list, object, range, str, + ascii, chr, hex, input, next, oct, open, + pow, round, super, + filter, map, zip) + +followed by predominantly standard, idiomatic Python 3 code that then runs +similarly on Python 2.6/2.7 and Python 3.3+. + +The imports have no effect on Python 3. On Python 2, they shadow the +corresponding builtins, which normally have different semantics on Python 3 +versus 2, to provide their Python 3 semantics. + + +Standard library reorganization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``future`` supports the standard library reorganization (PEP 3108) through the +following Py3 interfaces: + + >>> # Top-level packages with Py3 names provided on Py2: + >>> import html.parser + >>> import queue + >>> import tkinter.dialog + >>> import xmlrpc.client + >>> # etc. + + >>> # Aliases provided for extensions to existing Py2 module names: + >>> from future.standard_library import install_aliases + >>> install_aliases() + + >>> from collections import Counter, OrderedDict # backported to Py2.6 + >>> from collections import UserDict, UserList, UserString + >>> import urllib.request + >>> from itertools import filterfalse, zip_longest + >>> from subprocess import getoutput, getstatusoutput + + +Automatic conversion +-------------------- + +An included script called `futurize +`_ aids in converting +code (from either Python 2 or Python 3) to code compatible with both +platforms. It is similar to ``python-modernize`` but goes further in +providing Python 3 compatibility through the use of the backported types +and builtin functions in ``future``. + + +Documentation +------------- + +See: https://python-future.org + + +Credits +------- + +:Author: Ed Schofield, Jordan M. Adler, et al +:Sponsor: Python Charmers: https://pythoncharmers.com +:Others: See docs/credits.rst or https://python-future.org/credits.html + + +Licensing +--------- +Copyright 2013-2024 Python Charmers, Australia. +The software is distributed under an MIT licence. See LICENSE.txt. + +""" + +__title__ = 'future' +__author__ = 'Ed Schofield' +__license__ = 'MIT' +__copyright__ = 'Copyright 2013-2024 Python Charmers (https://pythoncharmers.com)' +__ver_major__ = 1 +__ver_minor__ = 0 +__ver_patch__ = 0 +__ver_sub__ = '' +__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__, + __ver_patch__, __ver_sub__) diff --git a/src/future/backports/__init__.py b/src/future/backports/__init__.py new file mode 100644 index 00000000..c71e0653 --- /dev/null +++ b/src/future/backports/__init__.py @@ -0,0 +1,26 @@ +""" +future.backports package +""" + +from __future__ import absolute_import + +import sys + +__future_module__ = True +from future.standard_library import import_top_level_modules + + +if sys.version_info[0] >= 3: + import_top_level_modules() + + +from .misc import (ceil, + OrderedDict, + Counter, + ChainMap, + check_output, + count, + recursive_repr, + _count_elements, + cmp_to_key + ) diff --git a/future/standard_library/_markupbase.py b/src/future/backports/_markupbase.py similarity index 100% rename from future/standard_library/_markupbase.py rename to src/future/backports/_markupbase.py diff --git a/src/future/backports/datetime.py b/src/future/backports/datetime.py new file mode 100644 index 00000000..8cd62ddf --- /dev/null +++ b/src/future/backports/datetime.py @@ -0,0 +1,2152 @@ +"""Concrete date/time and related types. + +See http://www.iana.org/time-zones/repository/tz-link.html for +time zone and DST data sources. +""" +from __future__ import division +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import absolute_import +from future.builtins import str +from future.builtins import bytes +from future.builtins import map +from future.builtins import round +from future.builtins import int +from future.builtins import object +from future.utils import native_str, PY2 + +import time as _time +import math as _math + +def _cmp(x, y): + return 0 if x == y else 1 if x > y else -1 + +MINYEAR = 1 +MAXYEAR = 9999 +_MAXORDINAL = 3652059 # date.max.toordinal() + +# Utility functions, adapted from Python's Demo/classes/Dates.py, which +# also assumes the current Gregorian calendar indefinitely extended in +# both directions. Difference: Dates.py calls January 1 of year 0 day +# number 1. The code here calls January 1 of year 1 day number 1. This is +# to match the definition of the "proleptic Gregorian" calendar in Dershowitz +# and Reingold's "Calendrical Calculations", where it's the base calendar +# for all computations. See the book for algorithms for converting between +# proleptic Gregorian ordinals and many other calendar systems. + +_DAYS_IN_MONTH = [None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + +_DAYS_BEFORE_MONTH = [None] +dbm = 0 +for dim in _DAYS_IN_MONTH[1:]: + _DAYS_BEFORE_MONTH.append(dbm) + dbm += dim +del dbm, dim + +def _is_leap(year): + "year -> 1 if leap year, else 0." + return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0) + +def _days_before_year(year): + "year -> number of days before January 1st of year." + y = year - 1 + return y*365 + y//4 - y//100 + y//400 + +def _days_in_month(year, month): + "year, month -> number of days in that month in that year." + assert 1 <= month <= 12, month + if month == 2 and _is_leap(year): + return 29 + return _DAYS_IN_MONTH[month] + +def _days_before_month(year, month): + "year, month -> number of days in year preceding first day of month." + assert 1 <= month <= 12, 'month must be in 1..12' + return _DAYS_BEFORE_MONTH[month] + (month > 2 and _is_leap(year)) + +def _ymd2ord(year, month, day): + "year, month, day -> ordinal, considering 01-Jan-0001 as day 1." + assert 1 <= month <= 12, 'month must be in 1..12' + dim = _days_in_month(year, month) + assert 1 <= day <= dim, ('day must be in 1..%d' % dim) + return (_days_before_year(year) + + _days_before_month(year, month) + + day) + +_DI400Y = _days_before_year(401) # number of days in 400 years +_DI100Y = _days_before_year(101) # " " " " 100 " +_DI4Y = _days_before_year(5) # " " " " 4 " + +# A 4-year cycle has an extra leap day over what we'd get from pasting +# together 4 single years. +assert _DI4Y == 4 * 365 + 1 + +# Similarly, a 400-year cycle has an extra leap day over what we'd get from +# pasting together 4 100-year cycles. +assert _DI400Y == 4 * _DI100Y + 1 + +# OTOH, a 100-year cycle has one fewer leap day than we'd get from +# pasting together 25 4-year cycles. +assert _DI100Y == 25 * _DI4Y - 1 + +def _ord2ymd(n): + "ordinal -> (year, month, day), considering 01-Jan-0001 as day 1." + + # n is a 1-based index, starting at 1-Jan-1. The pattern of leap years + # repeats exactly every 400 years. The basic strategy is to find the + # closest 400-year boundary at or before n, then work with the offset + # from that boundary to n. Life is much clearer if we subtract 1 from + # n first -- then the values of n at 400-year boundaries are exactly + # those divisible by _DI400Y: + # + # D M Y n n-1 + # -- --- ---- ---------- ---------------- + # 31 Dec -400 -_DI400Y -_DI400Y -1 + # 1 Jan -399 -_DI400Y +1 -_DI400Y 400-year boundary + # ... + # 30 Dec 000 -1 -2 + # 31 Dec 000 0 -1 + # 1 Jan 001 1 0 400-year boundary + # 2 Jan 001 2 1 + # 3 Jan 001 3 2 + # ... + # 31 Dec 400 _DI400Y _DI400Y -1 + # 1 Jan 401 _DI400Y +1 _DI400Y 400-year boundary + n -= 1 + n400, n = divmod(n, _DI400Y) + year = n400 * 400 + 1 # ..., -399, 1, 401, ... + + # Now n is the (non-negative) offset, in days, from January 1 of year, to + # the desired date. Now compute how many 100-year cycles precede n. + # Note that it's possible for n100 to equal 4! In that case 4 full + # 100-year cycles precede the desired day, which implies the desired + # day is December 31 at the end of a 400-year cycle. + n100, n = divmod(n, _DI100Y) + + # Now compute how many 4-year cycles precede it. + n4, n = divmod(n, _DI4Y) + + # And now how many single years. Again n1 can be 4, and again meaning + # that the desired day is December 31 at the end of the 4-year cycle. + n1, n = divmod(n, 365) + + year += n100 * 100 + n4 * 4 + n1 + if n1 == 4 or n100 == 4: + assert n == 0 + return year-1, 12, 31 + + # Now the year is correct, and n is the offset from January 1. We find + # the month via an estimate that's either exact or one too large. + leapyear = n1 == 3 and (n4 != 24 or n100 == 3) + assert leapyear == _is_leap(year) + month = (n + 50) >> 5 + preceding = _DAYS_BEFORE_MONTH[month] + (month > 2 and leapyear) + if preceding > n: # estimate is too large + month -= 1 + preceding -= _DAYS_IN_MONTH[month] + (month == 2 and leapyear) + n -= preceding + assert 0 <= n < _days_in_month(year, month) + + # Now the year and month are correct, and n is the offset from the + # start of that month: we're done! + return year, month, n+1 + +# Month and day names. For localized versions, see the calendar module. +_MONTHNAMES = [None, "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] +_DAYNAMES = [None, "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] + + +def _build_struct_time(y, m, d, hh, mm, ss, dstflag): + wday = (_ymd2ord(y, m, d) + 6) % 7 + dnum = _days_before_month(y, m) + d + return _time.struct_time((y, m, d, hh, mm, ss, wday, dnum, dstflag)) + +def _format_time(hh, mm, ss, us): + # Skip trailing microseconds when us==0. + result = "%02d:%02d:%02d" % (hh, mm, ss) + if us: + result += ".%06d" % us + return result + +# Correctly substitute for %z and %Z escapes in strftime formats. +def _wrap_strftime(object, format, timetuple): + # Don't call utcoffset() or tzname() unless actually needed. + freplace = None # the string to use for %f + zreplace = None # the string to use for %z + Zreplace = None # the string to use for %Z + + # Scan format for %z and %Z escapes, replacing as needed. + newformat = [] + push = newformat.append + i, n = 0, len(format) + while i < n: + ch = format[i] + i += 1 + if ch == '%': + if i < n: + ch = format[i] + i += 1 + if ch == 'f': + if freplace is None: + freplace = '%06d' % getattr(object, + 'microsecond', 0) + newformat.append(freplace) + elif ch == 'z': + if zreplace is None: + zreplace = "" + if hasattr(object, "utcoffset"): + offset = object.utcoffset() + if offset is not None: + sign = '+' + if offset.days < 0: + offset = -offset + sign = '-' + h, m = divmod(offset, timedelta(hours=1)) + assert not m % timedelta(minutes=1), "whole minute" + m //= timedelta(minutes=1) + zreplace = '%c%02d%02d' % (sign, h, m) + assert '%' not in zreplace + newformat.append(zreplace) + elif ch == 'Z': + if Zreplace is None: + Zreplace = "" + if hasattr(object, "tzname"): + s = object.tzname() + if s is not None: + # strftime is going to have at this: escape % + Zreplace = s.replace('%', '%%') + newformat.append(Zreplace) + else: + push('%') + push(ch) + else: + push('%') + else: + push(ch) + newformat = "".join(newformat) + return _time.strftime(newformat, timetuple) + +def _call_tzinfo_method(tzinfo, methname, tzinfoarg): + if tzinfo is None: + return None + return getattr(tzinfo, methname)(tzinfoarg) + +# Just raise TypeError if the arg isn't None or a string. +def _check_tzname(name): + if name is not None and not isinstance(name, str): + raise TypeError("tzinfo.tzname() must return None or string, " + "not '%s'" % type(name)) + +# name is the offset-producing method, "utcoffset" or "dst". +# offset is what it returned. +# If offset isn't None or timedelta, raises TypeError. +# If offset is None, returns None. +# Else offset is checked for being in range, and a whole # of minutes. +# If it is, its integer value is returned. Else ValueError is raised. +def _check_utc_offset(name, offset): + assert name in ("utcoffset", "dst") + if offset is None: + return + if not isinstance(offset, timedelta): + raise TypeError("tzinfo.%s() must return None " + "or timedelta, not '%s'" % (name, type(offset))) + if offset % timedelta(minutes=1) or offset.microseconds: + raise ValueError("tzinfo.%s() must return a whole number " + "of minutes, got %s" % (name, offset)) + if not -timedelta(1) < offset < timedelta(1): + raise ValueError("%s()=%s, must be must be strictly between" + " -timedelta(hours=24) and timedelta(hours=24)" + % (name, offset)) + +def _check_date_fields(year, month, day): + if not isinstance(year, int): + raise TypeError('int expected') + if not MINYEAR <= year <= MAXYEAR: + raise ValueError('year must be in %d..%d' % (MINYEAR, MAXYEAR), year) + if not 1 <= month <= 12: + raise ValueError('month must be in 1..12', month) + dim = _days_in_month(year, month) + if not 1 <= day <= dim: + raise ValueError('day must be in 1..%d' % dim, day) + +def _check_time_fields(hour, minute, second, microsecond): + if not isinstance(hour, int): + raise TypeError('int expected') + if not 0 <= hour <= 23: + raise ValueError('hour must be in 0..23', hour) + if not 0 <= minute <= 59: + raise ValueError('minute must be in 0..59', minute) + if not 0 <= second <= 59: + raise ValueError('second must be in 0..59', second) + if not 0 <= microsecond <= 999999: + raise ValueError('microsecond must be in 0..999999', microsecond) + +def _check_tzinfo_arg(tz): + if tz is not None and not isinstance(tz, tzinfo): + raise TypeError("tzinfo argument must be None or of a tzinfo subclass") + +def _cmperror(x, y): + raise TypeError("can't compare '%s' to '%s'" % ( + type(x).__name__, type(y).__name__)) + +class timedelta(object): + """Represent the difference between two datetime objects. + + Supported operators: + + - add, subtract timedelta + - unary plus, minus, abs + - compare to timedelta + - multiply, divide by int + + In addition, datetime supports subtraction of two datetime objects + returning a timedelta, and addition or subtraction of a datetime + and a timedelta giving a datetime. + + Representation: (days, seconds, microseconds). Why? Because I + felt like it. + """ + __slots__ = '_days', '_seconds', '_microseconds' + + def __new__(cls, days=0, seconds=0, microseconds=0, + milliseconds=0, minutes=0, hours=0, weeks=0): + # Doing this efficiently and accurately in C is going to be difficult + # and error-prone, due to ubiquitous overflow possibilities, and that + # C double doesn't have enough bits of precision to represent + # microseconds over 10K years faithfully. The code here tries to make + # explicit where go-fast assumptions can be relied on, in order to + # guide the C implementation; it's way more convoluted than speed- + # ignoring auto-overflow-to-long idiomatic Python could be. + + # XXX Check that all inputs are ints or floats. + + # Final values, all integer. + # s and us fit in 32-bit signed ints; d isn't bounded. + d = s = us = 0 + + # Normalize everything to days, seconds, microseconds. + days += weeks*7 + seconds += minutes*60 + hours*3600 + microseconds += milliseconds*1000 + + # Get rid of all fractions, and normalize s and us. + # Take a deep breath . + if isinstance(days, float): + dayfrac, days = _math.modf(days) + daysecondsfrac, daysecondswhole = _math.modf(dayfrac * (24.*3600.)) + assert daysecondswhole == int(daysecondswhole) # can't overflow + s = int(daysecondswhole) + assert days == int(days) + d = int(days) + else: + daysecondsfrac = 0.0 + d = days + assert isinstance(daysecondsfrac, float) + assert abs(daysecondsfrac) <= 1.0 + assert isinstance(d, int) + assert abs(s) <= 24 * 3600 + # days isn't referenced again before redefinition + + if isinstance(seconds, float): + secondsfrac, seconds = _math.modf(seconds) + assert seconds == int(seconds) + seconds = int(seconds) + secondsfrac += daysecondsfrac + assert abs(secondsfrac) <= 2.0 + else: + secondsfrac = daysecondsfrac + # daysecondsfrac isn't referenced again + assert isinstance(secondsfrac, float) + assert abs(secondsfrac) <= 2.0 + + assert isinstance(seconds, int) + days, seconds = divmod(seconds, 24*3600) + d += days + s += int(seconds) # can't overflow + assert isinstance(s, int) + assert abs(s) <= 2 * 24 * 3600 + # seconds isn't referenced again before redefinition + + usdouble = secondsfrac * 1e6 + assert abs(usdouble) < 2.1e6 # exact value not critical + # secondsfrac isn't referenced again + + if isinstance(microseconds, float): + microseconds += usdouble + microseconds = round(microseconds, 0) + seconds, microseconds = divmod(microseconds, 1e6) + assert microseconds == int(microseconds) + assert seconds == int(seconds) + days, seconds = divmod(seconds, 24.*3600.) + assert days == int(days) + assert seconds == int(seconds) + d += int(days) + s += int(seconds) # can't overflow + assert isinstance(s, int) + assert abs(s) <= 3 * 24 * 3600 + else: + seconds, microseconds = divmod(microseconds, 1000000) + days, seconds = divmod(seconds, 24*3600) + d += days + s += int(seconds) # can't overflow + assert isinstance(s, int) + assert abs(s) <= 3 * 24 * 3600 + microseconds = float(microseconds) + microseconds += usdouble + microseconds = round(microseconds, 0) + assert abs(s) <= 3 * 24 * 3600 + assert abs(microseconds) < 3.1e6 + + # Just a little bit of carrying possible for microseconds and seconds. + assert isinstance(microseconds, float) + assert int(microseconds) == microseconds + us = int(microseconds) + seconds, us = divmod(us, 1000000) + s += seconds # cant't overflow + assert isinstance(s, int) + days, s = divmod(s, 24*3600) + d += days + + assert isinstance(d, int) + assert isinstance(s, int) and 0 <= s < 24*3600 + assert isinstance(us, int) and 0 <= us < 1000000 + + self = object.__new__(cls) + + self._days = d + self._seconds = s + self._microseconds = us + if abs(d) > 999999999: + raise OverflowError("timedelta # of days is too large: %d" % d) + + return self + + def __repr__(self): + if self._microseconds: + return "%s(%d, %d, %d)" % ('datetime.' + self.__class__.__name__, + self._days, + self._seconds, + self._microseconds) + if self._seconds: + return "%s(%d, %d)" % ('datetime.' + self.__class__.__name__, + self._days, + self._seconds) + return "%s(%d)" % ('datetime.' + self.__class__.__name__, self._days) + + def __str__(self): + mm, ss = divmod(self._seconds, 60) + hh, mm = divmod(mm, 60) + s = "%d:%02d:%02d" % (hh, mm, ss) + if self._days: + def plural(n): + return n, abs(n) != 1 and "s" or "" + s = ("%d day%s, " % plural(self._days)) + s + if self._microseconds: + s = s + ".%06d" % self._microseconds + return s + + def total_seconds(self): + """Total seconds in the duration.""" + return ((self.days * 86400 + self.seconds)*10**6 + + self.microseconds) / 10**6 + + # Read-only field accessors + @property + def days(self): + """days""" + return self._days + + @property + def seconds(self): + """seconds""" + return self._seconds + + @property + def microseconds(self): + """microseconds""" + return self._microseconds + + def __add__(self, other): + if isinstance(other, timedelta): + # for CPython compatibility, we cannot use + # our __class__ here, but need a real timedelta + return timedelta(self._days + other._days, + self._seconds + other._seconds, + self._microseconds + other._microseconds) + return NotImplemented + + __radd__ = __add__ + + def __sub__(self, other): + if isinstance(other, timedelta): + # for CPython compatibility, we cannot use + # our __class__ here, but need a real timedelta + return timedelta(self._days - other._days, + self._seconds - other._seconds, + self._microseconds - other._microseconds) + return NotImplemented + + def __rsub__(self, other): + if isinstance(other, timedelta): + return -self + other + return NotImplemented + + def __neg__(self): + # for CPython compatibility, we cannot use + # our __class__ here, but need a real timedelta + return timedelta(-self._days, + -self._seconds, + -self._microseconds) + + def __pos__(self): + return self + + def __abs__(self): + if self._days < 0: + return -self + else: + return self + + def __mul__(self, other): + if isinstance(other, int): + # for CPython compatibility, we cannot use + # our __class__ here, but need a real timedelta + return timedelta(self._days * other, + self._seconds * other, + self._microseconds * other) + if isinstance(other, float): + a, b = other.as_integer_ratio() + return self * a / b + return NotImplemented + + __rmul__ = __mul__ + + def _to_microseconds(self): + return ((self._days * (24*3600) + self._seconds) * 1000000 + + self._microseconds) + + def __floordiv__(self, other): + if not isinstance(other, (int, timedelta)): + return NotImplemented + usec = self._to_microseconds() + if isinstance(other, timedelta): + return usec // other._to_microseconds() + if isinstance(other, int): + return timedelta(0, 0, usec // other) + + def __truediv__(self, other): + if not isinstance(other, (int, float, timedelta)): + return NotImplemented + usec = self._to_microseconds() + if isinstance(other, timedelta): + return usec / other._to_microseconds() + if isinstance(other, int): + return timedelta(0, 0, usec / other) + if isinstance(other, float): + a, b = other.as_integer_ratio() + return timedelta(0, 0, b * usec / a) + + def __mod__(self, other): + if isinstance(other, timedelta): + r = self._to_microseconds() % other._to_microseconds() + return timedelta(0, 0, r) + return NotImplemented + + def __divmod__(self, other): + if isinstance(other, timedelta): + q, r = divmod(self._to_microseconds(), + other._to_microseconds()) + return q, timedelta(0, 0, r) + return NotImplemented + + # Comparisons of timedelta objects with other. + + def __eq__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) == 0 + else: + return False + + def __ne__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) != 0 + else: + return True + + def __le__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) <= 0 + else: + _cmperror(self, other) + + def __lt__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) < 0 + else: + _cmperror(self, other) + + def __ge__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) >= 0 + else: + _cmperror(self, other) + + def __gt__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) > 0 + else: + _cmperror(self, other) + + def _cmp(self, other): + assert isinstance(other, timedelta) + return _cmp(self._getstate(), other._getstate()) + + def __hash__(self): + return hash(self._getstate()) + + def __bool__(self): + return (self._days != 0 or + self._seconds != 0 or + self._microseconds != 0) + + # Pickle support. + + def _getstate(self): + return (self._days, self._seconds, self._microseconds) + + def __reduce__(self): + return (self.__class__, self._getstate()) + +timedelta.min = timedelta(-999999999) +timedelta.max = timedelta(days=999999999, hours=23, minutes=59, seconds=59, + microseconds=999999) +timedelta.resolution = timedelta(microseconds=1) + +class date(object): + """Concrete date type. + + Constructors: + + __new__() + fromtimestamp() + today() + fromordinal() + + Operators: + + __repr__, __str__ + __cmp__, __hash__ + __add__, __radd__, __sub__ (add/radd only with timedelta arg) + + Methods: + + timetuple() + toordinal() + weekday() + isoweekday(), isocalendar(), isoformat() + ctime() + strftime() + + Properties (readonly): + year, month, day + """ + __slots__ = '_year', '_month', '_day' + + def __new__(cls, year, month=None, day=None): + """Constructor. + + Arguments: + + year, month, day (required, base 1) + """ + if (isinstance(year, bytes) and len(year) == 4 and + 1 <= year[2] <= 12 and month is None): # Month is sane + # Pickle support + self = object.__new__(cls) + self.__setstate(year) + return self + _check_date_fields(year, month, day) + self = object.__new__(cls) + self._year = year + self._month = month + self._day = day + return self + + # Additional constructors + + @classmethod + def fromtimestamp(cls, t): + "Construct a date from a POSIX timestamp (like time.time())." + y, m, d, hh, mm, ss, weekday, jday, dst = _time.localtime(t) + return cls(y, m, d) + + @classmethod + def today(cls): + "Construct a date from time.time()." + t = _time.time() + return cls.fromtimestamp(t) + + @classmethod + def fromordinal(cls, n): + """Construct a date from a proleptic Gregorian ordinal. + + January 1 of year 1 is day 1. Only the year, month and day are + non-zero in the result. + """ + y, m, d = _ord2ymd(n) + return cls(y, m, d) + + # Conversions to string + + def __repr__(self): + """Convert to formal string, for repr(). + + >>> dt = datetime(2010, 1, 1) + >>> repr(dt) + 'datetime.datetime(2010, 1, 1, 0, 0)' + + >>> dt = datetime(2010, 1, 1, tzinfo=timezone.utc) + >>> repr(dt) + 'datetime.datetime(2010, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)' + """ + return "%s(%d, %d, %d)" % ('datetime.' + self.__class__.__name__, + self._year, + self._month, + self._day) + # XXX These shouldn't depend on time.localtime(), because that + # clips the usable dates to [1970 .. 2038). At least ctime() is + # easily done without using strftime() -- that's better too because + # strftime("%c", ...) is locale specific. + + + def ctime(self): + "Return ctime() style string." + weekday = self.toordinal() % 7 or 7 + return "%s %s %2d 00:00:00 %04d" % ( + _DAYNAMES[weekday], + _MONTHNAMES[self._month], + self._day, self._year) + + def strftime(self, fmt): + "Format using strftime()." + return _wrap_strftime(self, fmt, self.timetuple()) + + def __format__(self, fmt): + if len(fmt) != 0: + return self.strftime(fmt) + return str(self) + + def isoformat(self): + """Return the date formatted according to ISO. + + This is 'YYYY-MM-DD'. + + References: + - http://www.w3.org/TR/NOTE-datetime + - http://www.cl.cam.ac.uk/~mgk25/iso-time.html + """ + return "%04d-%02d-%02d" % (self._year, self._month, self._day) + + __str__ = isoformat + + # Read-only field accessors + @property + def year(self): + """year (1-9999)""" + return self._year + + @property + def month(self): + """month (1-12)""" + return self._month + + @property + def day(self): + """day (1-31)""" + return self._day + + # Standard conversions, __cmp__, __hash__ (and helpers) + + def timetuple(self): + "Return local time tuple compatible with time.localtime()." + return _build_struct_time(self._year, self._month, self._day, + 0, 0, 0, -1) + + def toordinal(self): + """Return proleptic Gregorian ordinal for the year, month and day. + + January 1 of year 1 is day 1. Only the year, month and day values + contribute to the result. + """ + return _ymd2ord(self._year, self._month, self._day) + + def replace(self, year=None, month=None, day=None): + """Return a new date with new values for the specified fields.""" + if year is None: + year = self._year + if month is None: + month = self._month + if day is None: + day = self._day + _check_date_fields(year, month, day) + return date(year, month, day) + + # Comparisons of date objects with other. + + def __eq__(self, other): + if isinstance(other, date): + return self._cmp(other) == 0 + return NotImplemented + + def __ne__(self, other): + if isinstance(other, date): + return self._cmp(other) != 0 + return NotImplemented + + def __le__(self, other): + if isinstance(other, date): + return self._cmp(other) <= 0 + return NotImplemented + + def __lt__(self, other): + if isinstance(other, date): + return self._cmp(other) < 0 + return NotImplemented + + def __ge__(self, other): + if isinstance(other, date): + return self._cmp(other) >= 0 + return NotImplemented + + def __gt__(self, other): + if isinstance(other, date): + return self._cmp(other) > 0 + return NotImplemented + + def _cmp(self, other): + assert isinstance(other, date) + y, m, d = self._year, self._month, self._day + y2, m2, d2 = other._year, other._month, other._day + return _cmp((y, m, d), (y2, m2, d2)) + + def __hash__(self): + "Hash." + return hash(self._getstate()) + + # Computations + + def __add__(self, other): + "Add a date to a timedelta." + if isinstance(other, timedelta): + o = self.toordinal() + other.days + if 0 < o <= _MAXORDINAL: + return date.fromordinal(o) + raise OverflowError("result out of range") + return NotImplemented + + __radd__ = __add__ + + def __sub__(self, other): + """Subtract two dates, or a date and a timedelta.""" + if isinstance(other, timedelta): + return self + timedelta(-other.days) + if isinstance(other, date): + days1 = self.toordinal() + days2 = other.toordinal() + return timedelta(days1 - days2) + return NotImplemented + + def weekday(self): + "Return day of the week, where Monday == 0 ... Sunday == 6." + return (self.toordinal() + 6) % 7 + + # Day-of-the-week and week-of-the-year, according to ISO + + def isoweekday(self): + "Return day of the week, where Monday == 1 ... Sunday == 7." + # 1-Jan-0001 is a Monday + return self.toordinal() % 7 or 7 + + def isocalendar(self): + """Return a 3-tuple containing ISO year, week number, and weekday. + + The first ISO week of the year is the (Mon-Sun) week + containing the year's first Thursday; everything else derives + from that. + + The first week is 1; Monday is 1 ... Sunday is 7. + + ISO calendar algorithm taken from + http://www.phys.uu.nl/~vgent/calendar/isocalendar.htm + """ + year = self._year + week1monday = _isoweek1monday(year) + today = _ymd2ord(self._year, self._month, self._day) + # Internally, week and day have origin 0 + week, day = divmod(today - week1monday, 7) + if week < 0: + year -= 1 + week1monday = _isoweek1monday(year) + week, day = divmod(today - week1monday, 7) + elif week >= 52: + if today >= _isoweek1monday(year+1): + year += 1 + week = 0 + return year, week+1, day+1 + + # Pickle support. + + def _getstate(self): + yhi, ylo = divmod(self._year, 256) + return bytes([yhi, ylo, self._month, self._day]), + + def __setstate(self, string): + if len(string) != 4 or not (1 <= string[2] <= 12): + raise TypeError("not enough arguments") + yhi, ylo, self._month, self._day = string + self._year = yhi * 256 + ylo + + def __reduce__(self): + return (self.__class__, self._getstate()) + +_date_class = date # so functions w/ args named "date" can get at the class + +date.min = date(1, 1, 1) +date.max = date(9999, 12, 31) +date.resolution = timedelta(days=1) + +class tzinfo(object): + """Abstract base class for time zone info classes. + + Subclasses must override the name(), utcoffset() and dst() methods. + """ + __slots__ = () + def tzname(self, dt): + "datetime -> string name of time zone." + raise NotImplementedError("tzinfo subclass must override tzname()") + + def utcoffset(self, dt): + "datetime -> minutes east of UTC (negative for west of UTC)" + raise NotImplementedError("tzinfo subclass must override utcoffset()") + + def dst(self, dt): + """datetime -> DST offset in minutes east of UTC. + + Return 0 if DST not in effect. utcoffset() must include the DST + offset. + """ + raise NotImplementedError("tzinfo subclass must override dst()") + + def fromutc(self, dt): + "datetime in UTC -> datetime in local time." + + if not isinstance(dt, datetime): + raise TypeError("fromutc() requires a datetime argument") + if dt.tzinfo is not self: + raise ValueError("dt.tzinfo is not self") + + dtoff = dt.utcoffset() + if dtoff is None: + raise ValueError("fromutc() requires a non-None utcoffset() " + "result") + + # See the long comment block at the end of this file for an + # explanation of this algorithm. + dtdst = dt.dst() + if dtdst is None: + raise ValueError("fromutc() requires a non-None dst() result") + delta = dtoff - dtdst + if delta: + dt += delta + dtdst = dt.dst() + if dtdst is None: + raise ValueError("fromutc(): dt.dst gave inconsistent " + "results; cannot convert") + return dt + dtdst + + # Pickle support. + + def __reduce__(self): + getinitargs = getattr(self, "__getinitargs__", None) + if getinitargs: + args = getinitargs() + else: + args = () + getstate = getattr(self, "__getstate__", None) + if getstate: + state = getstate() + else: + state = getattr(self, "__dict__", None) or None + if state is None: + return (self.__class__, args) + else: + return (self.__class__, args, state) + +_tzinfo_class = tzinfo + +class time(object): + """Time with time zone. + + Constructors: + + __new__() + + Operators: + + __repr__, __str__ + __cmp__, __hash__ + + Methods: + + strftime() + isoformat() + utcoffset() + tzname() + dst() + + Properties (readonly): + hour, minute, second, microsecond, tzinfo + """ + + def __new__(cls, hour=0, minute=0, second=0, microsecond=0, tzinfo=None): + """Constructor. + + Arguments: + + hour, minute (required) + second, microsecond (default to zero) + tzinfo (default to None) + """ + self = object.__new__(cls) + if isinstance(hour, bytes) and len(hour) == 6: + # Pickle support + self.__setstate(hour, minute or None) + return self + _check_tzinfo_arg(tzinfo) + _check_time_fields(hour, minute, second, microsecond) + self._hour = hour + self._minute = minute + self._second = second + self._microsecond = microsecond + self._tzinfo = tzinfo + return self + + # Read-only field accessors + @property + def hour(self): + """hour (0-23)""" + return self._hour + + @property + def minute(self): + """minute (0-59)""" + return self._minute + + @property + def second(self): + """second (0-59)""" + return self._second + + @property + def microsecond(self): + """microsecond (0-999999)""" + return self._microsecond + + @property + def tzinfo(self): + """timezone info object""" + return self._tzinfo + + # Standard conversions, __hash__ (and helpers) + + # Comparisons of time objects with other. + + def __eq__(self, other): + if isinstance(other, time): + return self._cmp(other, allow_mixed=True) == 0 + else: + return False + + def __ne__(self, other): + if isinstance(other, time): + return self._cmp(other, allow_mixed=True) != 0 + else: + return True + + def __le__(self, other): + if isinstance(other, time): + return self._cmp(other) <= 0 + else: + _cmperror(self, other) + + def __lt__(self, other): + if isinstance(other, time): + return self._cmp(other) < 0 + else: + _cmperror(self, other) + + def __ge__(self, other): + if isinstance(other, time): + return self._cmp(other) >= 0 + else: + _cmperror(self, other) + + def __gt__(self, other): + if isinstance(other, time): + return self._cmp(other) > 0 + else: + _cmperror(self, other) + + def _cmp(self, other, allow_mixed=False): + assert isinstance(other, time) + mytz = self._tzinfo + ottz = other._tzinfo + myoff = otoff = None + + if mytz is ottz: + base_compare = True + else: + myoff = self.utcoffset() + otoff = other.utcoffset() + base_compare = myoff == otoff + + if base_compare: + return _cmp((self._hour, self._minute, self._second, + self._microsecond), + (other._hour, other._minute, other._second, + other._microsecond)) + if myoff is None or otoff is None: + if allow_mixed: + return 2 # arbitrary non-zero value + else: + raise TypeError("cannot compare naive and aware times") + myhhmm = self._hour * 60 + self._minute - myoff//timedelta(minutes=1) + othhmm = other._hour * 60 + other._minute - otoff//timedelta(minutes=1) + return _cmp((myhhmm, self._second, self._microsecond), + (othhmm, other._second, other._microsecond)) + + def __hash__(self): + """Hash.""" + tzoff = self.utcoffset() + if not tzoff: # zero or None + return hash(self._getstate()[0]) + h, m = divmod(timedelta(hours=self.hour, minutes=self.minute) - tzoff, + timedelta(hours=1)) + assert not m % timedelta(minutes=1), "whole minute" + m //= timedelta(minutes=1) + if 0 <= h < 24: + return hash(time(h, m, self.second, self.microsecond)) + return hash((h, m, self.second, self.microsecond)) + + # Conversion to string + + def _tzstr(self, sep=":"): + """Return formatted timezone offset (+xx:xx) or None.""" + off = self.utcoffset() + if off is not None: + if off.days < 0: + sign = "-" + off = -off + else: + sign = "+" + hh, mm = divmod(off, timedelta(hours=1)) + assert not mm % timedelta(minutes=1), "whole minute" + mm //= timedelta(minutes=1) + assert 0 <= hh < 24 + off = "%s%02d%s%02d" % (sign, hh, sep, mm) + return off + + def __repr__(self): + """Convert to formal string, for repr().""" + if self._microsecond != 0: + s = ", %d, %d" % (self._second, self._microsecond) + elif self._second != 0: + s = ", %d" % self._second + else: + s = "" + s= "%s(%d, %d%s)" % ('datetime.' + self.__class__.__name__, + self._hour, self._minute, s) + if self._tzinfo is not None: + assert s[-1:] == ")" + s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")" + return s + + def isoformat(self): + """Return the time formatted according to ISO. + + This is 'HH:MM:SS.mmmmmm+zz:zz', or 'HH:MM:SS+zz:zz' if + self.microsecond == 0. + """ + s = _format_time(self._hour, self._minute, self._second, + self._microsecond) + tz = self._tzstr() + if tz: + s += tz + return s + + __str__ = isoformat + + def strftime(self, fmt): + """Format using strftime(). The date part of the timestamp passed + to underlying strftime should not be used. + """ + # The year must be >= 1000 else Python's strftime implementation + # can raise a bogus exception. + timetuple = (1900, 1, 1, + self._hour, self._minute, self._second, + 0, 1, -1) + return _wrap_strftime(self, fmt, timetuple) + + def __format__(self, fmt): + if len(fmt) != 0: + return self.strftime(fmt) + return str(self) + + # Timezone functions + + def utcoffset(self): + """Return the timezone offset in minutes east of UTC (negative west of + UTC).""" + if self._tzinfo is None: + return None + offset = self._tzinfo.utcoffset(None) + _check_utc_offset("utcoffset", offset) + return offset + + def tzname(self): + """Return the timezone name. + + Note that the name is 100% informational -- there's no requirement that + it mean anything in particular. For example, "GMT", "UTC", "-500", + "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies. + """ + if self._tzinfo is None: + return None + name = self._tzinfo.tzname(None) + _check_tzname(name) + return name + + def dst(self): + """Return 0 if DST is not in effect, or the DST offset (in minutes + eastward) if DST is in effect. + + This is purely informational; the DST offset has already been added to + the UTC offset returned by utcoffset() if applicable, so there's no + need to consult dst() unless you're interested in displaying the DST + info. + """ + if self._tzinfo is None: + return None + offset = self._tzinfo.dst(None) + _check_utc_offset("dst", offset) + return offset + + def replace(self, hour=None, minute=None, second=None, microsecond=None, + tzinfo=True): + """Return a new time with new values for the specified fields.""" + if hour is None: + hour = self.hour + if minute is None: + minute = self.minute + if second is None: + second = self.second + if microsecond is None: + microsecond = self.microsecond + if tzinfo is True: + tzinfo = self.tzinfo + _check_time_fields(hour, minute, second, microsecond) + _check_tzinfo_arg(tzinfo) + return time(hour, minute, second, microsecond, tzinfo) + + def __bool__(self): + if self.second or self.microsecond: + return True + offset = self.utcoffset() or timedelta(0) + return timedelta(hours=self.hour, minutes=self.minute) != offset + + # Pickle support. + + def _getstate(self): + us2, us3 = divmod(self._microsecond, 256) + us1, us2 = divmod(us2, 256) + basestate = bytes([self._hour, self._minute, self._second, + us1, us2, us3]) + if self._tzinfo is None: + return (basestate,) + else: + return (basestate, self._tzinfo) + + def __setstate(self, string, tzinfo): + if len(string) != 6 or string[0] >= 24: + raise TypeError("an integer is required") + (self._hour, self._minute, self._second, + us1, us2, us3) = string + self._microsecond = (((us1 << 8) | us2) << 8) | us3 + if tzinfo is None or isinstance(tzinfo, _tzinfo_class): + self._tzinfo = tzinfo + else: + raise TypeError("bad tzinfo state arg %r" % tzinfo) + + def __reduce__(self): + return (time, self._getstate()) + +_time_class = time # so functions w/ args named "time" can get at the class + +time.min = time(0, 0, 0) +time.max = time(23, 59, 59, 999999) +time.resolution = timedelta(microseconds=1) + +class datetime(date): + """datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]]) + + The year, month and day arguments are required. tzinfo may be None, or an + instance of a tzinfo subclass. The remaining arguments may be ints. + """ + + __slots__ = date.__slots__ + ( + '_hour', '_minute', '_second', + '_microsecond', '_tzinfo') + def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0, + microsecond=0, tzinfo=None): + if isinstance(year, bytes) and len(year) == 10: + # Pickle support + self = date.__new__(cls, year[:4]) + self.__setstate(year, month) + return self + _check_tzinfo_arg(tzinfo) + _check_time_fields(hour, minute, second, microsecond) + self = date.__new__(cls, year, month, day) + self._hour = hour + self._minute = minute + self._second = second + self._microsecond = microsecond + self._tzinfo = tzinfo + return self + + # Read-only field accessors + @property + def hour(self): + """hour (0-23)""" + return self._hour + + @property + def minute(self): + """minute (0-59)""" + return self._minute + + @property + def second(self): + """second (0-59)""" + return self._second + + @property + def microsecond(self): + """microsecond (0-999999)""" + return self._microsecond + + @property + def tzinfo(self): + """timezone info object""" + return self._tzinfo + + @classmethod + def fromtimestamp(cls, t, tz=None): + """Construct a datetime from a POSIX timestamp (like time.time()). + + A timezone info object may be passed in as well. + """ + + _check_tzinfo_arg(tz) + + converter = _time.localtime if tz is None else _time.gmtime + + t, frac = divmod(t, 1.0) + us = int(frac * 1e6) + + # If timestamp is less than one microsecond smaller than a + # full second, us can be rounded up to 1000000. In this case, + # roll over to seconds, otherwise, ValueError is raised + # by the constructor. + if us == 1000000: + t += 1 + us = 0 + y, m, d, hh, mm, ss, weekday, jday, dst = converter(t) + ss = min(ss, 59) # clamp out leap seconds if the platform has them + result = cls(y, m, d, hh, mm, ss, us, tz) + if tz is not None: + result = tz.fromutc(result) + return result + + @classmethod + def utcfromtimestamp(cls, t): + "Construct a UTC datetime from a POSIX timestamp (like time.time())." + t, frac = divmod(t, 1.0) + us = int(frac * 1e6) + + # If timestamp is less than one microsecond smaller than a + # full second, us can be rounded up to 1000000. In this case, + # roll over to seconds, otherwise, ValueError is raised + # by the constructor. + if us == 1000000: + t += 1 + us = 0 + y, m, d, hh, mm, ss, weekday, jday, dst = _time.gmtime(t) + ss = min(ss, 59) # clamp out leap seconds if the platform has them + return cls(y, m, d, hh, mm, ss, us) + + # XXX This is supposed to do better than we *can* do by using time.time(), + # XXX if the platform supports a more accurate way. The C implementation + # XXX uses gettimeofday on platforms that have it, but that isn't + # XXX available from Python. So now() may return different results + # XXX across the implementations. + @classmethod + def now(cls, tz=None): + "Construct a datetime from time.time() and optional time zone info." + t = _time.time() + return cls.fromtimestamp(t, tz) + + @classmethod + def utcnow(cls): + "Construct a UTC datetime from time.time()." + t = _time.time() + return cls.utcfromtimestamp(t) + + @classmethod + def combine(cls, date, time): + "Construct a datetime from a given date and a given time." + if not isinstance(date, _date_class): + raise TypeError("date argument must be a date instance") + if not isinstance(time, _time_class): + raise TypeError("time argument must be a time instance") + return cls(date.year, date.month, date.day, + time.hour, time.minute, time.second, time.microsecond, + time.tzinfo) + + def timetuple(self): + "Return local time tuple compatible with time.localtime()." + dst = self.dst() + if dst is None: + dst = -1 + elif dst: + dst = 1 + else: + dst = 0 + return _build_struct_time(self.year, self.month, self.day, + self.hour, self.minute, self.second, + dst) + + def timestamp(self): + "Return POSIX timestamp as float" + if self._tzinfo is None: + return _time.mktime((self.year, self.month, self.day, + self.hour, self.minute, self.second, + -1, -1, -1)) + self.microsecond / 1e6 + else: + return (self - _EPOCH).total_seconds() + + def utctimetuple(self): + "Return UTC time tuple compatible with time.gmtime()." + offset = self.utcoffset() + if offset: + self -= offset + y, m, d = self.year, self.month, self.day + hh, mm, ss = self.hour, self.minute, self.second + return _build_struct_time(y, m, d, hh, mm, ss, 0) + + def date(self): + "Return the date part." + return date(self._year, self._month, self._day) + + def time(self): + "Return the time part, with tzinfo None." + return time(self.hour, self.minute, self.second, self.microsecond) + + def timetz(self): + "Return the time part, with same tzinfo." + return time(self.hour, self.minute, self.second, self.microsecond, + self._tzinfo) + + def replace(self, year=None, month=None, day=None, hour=None, + minute=None, second=None, microsecond=None, tzinfo=True): + """Return a new datetime with new values for the specified fields.""" + if year is None: + year = self.year + if month is None: + month = self.month + if day is None: + day = self.day + if hour is None: + hour = self.hour + if minute is None: + minute = self.minute + if second is None: + second = self.second + if microsecond is None: + microsecond = self.microsecond + if tzinfo is True: + tzinfo = self.tzinfo + _check_date_fields(year, month, day) + _check_time_fields(hour, minute, second, microsecond) + _check_tzinfo_arg(tzinfo) + return datetime(year, month, day, hour, minute, second, + microsecond, tzinfo) + + def astimezone(self, tz=None): + if tz is None: + if self.tzinfo is None: + raise ValueError("astimezone() requires an aware datetime") + ts = (self - _EPOCH) // timedelta(seconds=1) + localtm = _time.localtime(ts) + local = datetime(*localtm[:6]) + try: + # Extract TZ data if available + gmtoff = localtm.tm_gmtoff + zone = localtm.tm_zone + except AttributeError: + # Compute UTC offset and compare with the value implied + # by tm_isdst. If the values match, use the zone name + # implied by tm_isdst. + delta = local - datetime(*_time.gmtime(ts)[:6]) + dst = _time.daylight and localtm.tm_isdst > 0 + gmtoff = -(_time.altzone if dst else _time.timezone) + if delta == timedelta(seconds=gmtoff): + tz = timezone(delta, _time.tzname[dst]) + else: + tz = timezone(delta) + else: + tz = timezone(timedelta(seconds=gmtoff), zone) + + elif not isinstance(tz, tzinfo): + raise TypeError("tz argument must be an instance of tzinfo") + + mytz = self.tzinfo + if mytz is None: + raise ValueError("astimezone() requires an aware datetime") + + if tz is mytz: + return self + + # Convert self to UTC, and attach the new time zone object. + myoffset = self.utcoffset() + if myoffset is None: + raise ValueError("astimezone() requires an aware datetime") + utc = (self - myoffset).replace(tzinfo=tz) + + # Convert from UTC to tz's local time. + return tz.fromutc(utc) + + # Ways to produce a string. + + def ctime(self): + "Return ctime() style string." + weekday = self.toordinal() % 7 or 7 + return "%s %s %2d %02d:%02d:%02d %04d" % ( + _DAYNAMES[weekday], + _MONTHNAMES[self._month], + self._day, + self._hour, self._minute, self._second, + self._year) + + def isoformat(self, sep='T'): + """Return the time formatted according to ISO. + + This is 'YYYY-MM-DD HH:MM:SS.mmmmmm', or 'YYYY-MM-DD HH:MM:SS' if + self.microsecond == 0. + + If self.tzinfo is not None, the UTC offset is also attached, giving + 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM' or 'YYYY-MM-DD HH:MM:SS+HH:MM'. + + Optional argument sep specifies the separator between date and + time, default 'T'. + """ + s = ("%04d-%02d-%02d%c" % (self._year, self._month, self._day, + sep) + + _format_time(self._hour, self._minute, self._second, + self._microsecond)) + off = self.utcoffset() + if off is not None: + if off.days < 0: + sign = "-" + off = -off + else: + sign = "+" + hh, mm = divmod(off, timedelta(hours=1)) + assert not mm % timedelta(minutes=1), "whole minute" + mm //= timedelta(minutes=1) + s += "%s%02d:%02d" % (sign, hh, mm) + return s + + def __repr__(self): + """Convert to formal string, for repr().""" + L = [self._year, self._month, self._day, # These are never zero + self._hour, self._minute, self._second, self._microsecond] + if L[-1] == 0: + del L[-1] + if L[-1] == 0: + del L[-1] + s = ", ".join(map(str, L)) + s = "%s(%s)" % ('datetime.' + self.__class__.__name__, s) + if self._tzinfo is not None: + assert s[-1:] == ")" + s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")" + return s + + def __str__(self): + "Convert to string, for str()." + return self.isoformat(sep=' ') + + @classmethod + def strptime(cls, date_string, format): + 'string, format -> new datetime parsed from a string (like time.strptime()).' + import _strptime + return _strptime._strptime_datetime(cls, date_string, format) + + def utcoffset(self): + """Return the timezone offset in minutes east of UTC (negative west of + UTC).""" + if self._tzinfo is None: + return None + offset = self._tzinfo.utcoffset(self) + _check_utc_offset("utcoffset", offset) + return offset + + def tzname(self): + """Return the timezone name. + + Note that the name is 100% informational -- there's no requirement that + it mean anything in particular. For example, "GMT", "UTC", "-500", + "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies. + """ + name = _call_tzinfo_method(self._tzinfo, "tzname", self) + _check_tzname(name) + return name + + def dst(self): + """Return 0 if DST is not in effect, or the DST offset (in minutes + eastward) if DST is in effect. + + This is purely informational; the DST offset has already been added to + the UTC offset returned by utcoffset() if applicable, so there's no + need to consult dst() unless you're interested in displaying the DST + info. + """ + if self._tzinfo is None: + return None + offset = self._tzinfo.dst(self) + _check_utc_offset("dst", offset) + return offset + + # Comparisons of datetime objects with other. + + def __eq__(self, other): + if isinstance(other, datetime): + return self._cmp(other, allow_mixed=True) == 0 + elif not isinstance(other, date): + return NotImplemented + else: + return False + + def __ne__(self, other): + if isinstance(other, datetime): + return self._cmp(other, allow_mixed=True) != 0 + elif not isinstance(other, date): + return NotImplemented + else: + return True + + def __le__(self, other): + if isinstance(other, datetime): + return self._cmp(other) <= 0 + elif not isinstance(other, date): + return NotImplemented + else: + _cmperror(self, other) + + def __lt__(self, other): + if isinstance(other, datetime): + return self._cmp(other) < 0 + elif not isinstance(other, date): + return NotImplemented + else: + _cmperror(self, other) + + def __ge__(self, other): + if isinstance(other, datetime): + return self._cmp(other) >= 0 + elif not isinstance(other, date): + return NotImplemented + else: + _cmperror(self, other) + + def __gt__(self, other): + if isinstance(other, datetime): + return self._cmp(other) > 0 + elif not isinstance(other, date): + return NotImplemented + else: + _cmperror(self, other) + + def _cmp(self, other, allow_mixed=False): + assert isinstance(other, datetime) + mytz = self._tzinfo + ottz = other._tzinfo + myoff = otoff = None + + if mytz is ottz: + base_compare = True + else: + myoff = self.utcoffset() + otoff = other.utcoffset() + base_compare = myoff == otoff + + if base_compare: + return _cmp((self._year, self._month, self._day, + self._hour, self._minute, self._second, + self._microsecond), + (other._year, other._month, other._day, + other._hour, other._minute, other._second, + other._microsecond)) + if myoff is None or otoff is None: + if allow_mixed: + return 2 # arbitrary non-zero value + else: + raise TypeError("cannot compare naive and aware datetimes") + # XXX What follows could be done more efficiently... + diff = self - other # this will take offsets into account + if diff.days < 0: + return -1 + return diff and 1 or 0 + + def __add__(self, other): + "Add a datetime and a timedelta." + if not isinstance(other, timedelta): + return NotImplemented + delta = timedelta(self.toordinal(), + hours=self._hour, + minutes=self._minute, + seconds=self._second, + microseconds=self._microsecond) + delta += other + hour, rem = divmod(delta.seconds, 3600) + minute, second = divmod(rem, 60) + if 0 < delta.days <= _MAXORDINAL: + return datetime.combine(date.fromordinal(delta.days), + time(hour, minute, second, + delta.microseconds, + tzinfo=self._tzinfo)) + raise OverflowError("result out of range") + + __radd__ = __add__ + + def __sub__(self, other): + "Subtract two datetimes, or a datetime and a timedelta." + if not isinstance(other, datetime): + if isinstance(other, timedelta): + return self + -other + return NotImplemented + + days1 = self.toordinal() + days2 = other.toordinal() + secs1 = self._second + self._minute * 60 + self._hour * 3600 + secs2 = other._second + other._minute * 60 + other._hour * 3600 + base = timedelta(days1 - days2, + secs1 - secs2, + self._microsecond - other._microsecond) + if self._tzinfo is other._tzinfo: + return base + myoff = self.utcoffset() + otoff = other.utcoffset() + if myoff == otoff: + return base + if myoff is None or otoff is None: + raise TypeError("cannot mix naive and timezone-aware time") + return base + otoff - myoff + + def __hash__(self): + tzoff = self.utcoffset() + if tzoff is None: + return hash(self._getstate()[0]) + days = _ymd2ord(self.year, self.month, self.day) + seconds = self.hour * 3600 + self.minute * 60 + self.second + return hash(timedelta(days, seconds, self.microsecond) - tzoff) + + # Pickle support. + + def _getstate(self): + yhi, ylo = divmod(self._year, 256) + us2, us3 = divmod(self._microsecond, 256) + us1, us2 = divmod(us2, 256) + basestate = bytes([yhi, ylo, self._month, self._day, + self._hour, self._minute, self._second, + us1, us2, us3]) + if self._tzinfo is None: + return (basestate,) + else: + return (basestate, self._tzinfo) + + def __setstate(self, string, tzinfo): + (yhi, ylo, self._month, self._day, self._hour, + self._minute, self._second, us1, us2, us3) = string + self._year = yhi * 256 + ylo + self._microsecond = (((us1 << 8) | us2) << 8) | us3 + if tzinfo is None or isinstance(tzinfo, _tzinfo_class): + self._tzinfo = tzinfo + else: + raise TypeError("bad tzinfo state arg %r" % tzinfo) + + def __reduce__(self): + return (self.__class__, self._getstate()) + + +datetime.min = datetime(1, 1, 1) +datetime.max = datetime(9999, 12, 31, 23, 59, 59, 999999) +datetime.resolution = timedelta(microseconds=1) + + +def _isoweek1monday(year): + # Helper to calculate the day number of the Monday starting week 1 + # XXX This could be done more efficiently + THURSDAY = 3 + firstday = _ymd2ord(year, 1, 1) + firstweekday = (firstday + 6) % 7 # See weekday() above + week1monday = firstday - firstweekday + if firstweekday > THURSDAY: + week1monday += 7 + return week1monday + +class timezone(tzinfo): + __slots__ = '_offset', '_name' + + # Sentinel value to disallow None + _Omitted = object() + def __new__(cls, offset, name=_Omitted): + if not isinstance(offset, timedelta): + raise TypeError("offset must be a timedelta") + if name is cls._Omitted: + if not offset: + return cls.utc + name = None + elif not isinstance(name, str): + ### + # For Python-Future: + if PY2 and isinstance(name, native_str): + name = name.decode() + else: + raise TypeError("name must be a string") + ### + if not cls._minoffset <= offset <= cls._maxoffset: + raise ValueError("offset must be a timedelta" + " strictly between -timedelta(hours=24) and" + " timedelta(hours=24).") + if (offset.microseconds != 0 or + offset.seconds % 60 != 0): + raise ValueError("offset must be a timedelta" + " representing a whole number of minutes") + return cls._create(offset, name) + + @classmethod + def _create(cls, offset, name=None): + self = tzinfo.__new__(cls) + self._offset = offset + self._name = name + return self + + def __getinitargs__(self): + """pickle support""" + if self._name is None: + return (self._offset,) + return (self._offset, self._name) + + def __eq__(self, other): + if type(other) != timezone: + return False + return self._offset == other._offset + + def __hash__(self): + return hash(self._offset) + + def __repr__(self): + """Convert to formal string, for repr(). + + >>> tz = timezone.utc + >>> repr(tz) + 'datetime.timezone.utc' + >>> tz = timezone(timedelta(hours=-5), 'EST') + >>> repr(tz) + "datetime.timezone(datetime.timedelta(-1, 68400), 'EST')" + """ + if self is self.utc: + return 'datetime.timezone.utc' + if self._name is None: + return "%s(%r)" % ('datetime.' + self.__class__.__name__, + self._offset) + return "%s(%r, %r)" % ('datetime.' + self.__class__.__name__, + self._offset, self._name) + + def __str__(self): + return self.tzname(None) + + def utcoffset(self, dt): + if isinstance(dt, datetime) or dt is None: + return self._offset + raise TypeError("utcoffset() argument must be a datetime instance" + " or None") + + def tzname(self, dt): + if isinstance(dt, datetime) or dt is None: + if self._name is None: + return self._name_from_offset(self._offset) + return self._name + raise TypeError("tzname() argument must be a datetime instance" + " or None") + + def dst(self, dt): + if isinstance(dt, datetime) or dt is None: + return None + raise TypeError("dst() argument must be a datetime instance" + " or None") + + def fromutc(self, dt): + if isinstance(dt, datetime): + if dt.tzinfo is not self: + raise ValueError("fromutc: dt.tzinfo " + "is not self") + return dt + self._offset + raise TypeError("fromutc() argument must be a datetime instance" + " or None") + + _maxoffset = timedelta(hours=23, minutes=59) + _minoffset = -_maxoffset + + @staticmethod + def _name_from_offset(delta): + if delta < timedelta(0): + sign = '-' + delta = -delta + else: + sign = '+' + hours, rest = divmod(delta, timedelta(hours=1)) + minutes = rest // timedelta(minutes=1) + return 'UTC{}{:02d}:{:02d}'.format(sign, hours, minutes) + +timezone.utc = timezone._create(timedelta(0)) +timezone.min = timezone._create(timezone._minoffset) +timezone.max = timezone._create(timezone._maxoffset) +_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc) +""" +Some time zone algebra. For a datetime x, let + x.n = x stripped of its timezone -- its naive time. + x.o = x.utcoffset(), and assuming that doesn't raise an exception or + return None + x.d = x.dst(), and assuming that doesn't raise an exception or + return None + x.s = x's standard offset, x.o - x.d + +Now some derived rules, where k is a duration (timedelta). + +1. x.o = x.s + x.d + This follows from the definition of x.s. + +2. If x and y have the same tzinfo member, x.s = y.s. + This is actually a requirement, an assumption we need to make about + sane tzinfo classes. + +3. The naive UTC time corresponding to x is x.n - x.o. + This is again a requirement for a sane tzinfo class. + +4. (x+k).s = x.s + This follows from #2, and that datimetimetz+timedelta preserves tzinfo. + +5. (x+k).n = x.n + k + Again follows from how arithmetic is defined. + +Now we can explain tz.fromutc(x). Let's assume it's an interesting case +(meaning that the various tzinfo methods exist, and don't blow up or return +None when called). + +The function wants to return a datetime y with timezone tz, equivalent to x. +x is already in UTC. + +By #3, we want + + y.n - y.o = x.n [1] + +The algorithm starts by attaching tz to x.n, and calling that y. So +x.n = y.n at the start. Then it wants to add a duration k to y, so that [1] +becomes true; in effect, we want to solve [2] for k: + + (y+k).n - (y+k).o = x.n [2] + +By #1, this is the same as + + (y+k).n - ((y+k).s + (y+k).d) = x.n [3] + +By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start. +Substituting that into [3], + + x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving + k - (y+k).s - (y+k).d = 0; rearranging, + k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so + k = y.s - (y+k).d + +On the RHS, (y+k).d can't be computed directly, but y.s can be, and we +approximate k by ignoring the (y+k).d term at first. Note that k can't be +very large, since all offset-returning methods return a duration of magnitude +less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must +be 0, so ignoring it has no consequence then. + +In any case, the new value is + + z = y + y.s [4] + +It's helpful to step back at look at [4] from a higher level: it's simply +mapping from UTC to tz's standard time. + +At this point, if + + z.n - z.o = x.n [5] + +we have an equivalent time, and are almost done. The insecurity here is +at the start of daylight time. Picture US Eastern for concreteness. The wall +time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good +sense then. The docs ask that an Eastern tzinfo class consider such a time to +be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST +on the day DST starts. We want to return the 1:MM EST spelling because that's +the only spelling that makes sense on the local wall clock. + +In fact, if [5] holds at this point, we do have the standard-time spelling, +but that takes a bit of proof. We first prove a stronger result. What's the +difference between the LHS and RHS of [5]? Let + + diff = x.n - (z.n - z.o) [6] + +Now + z.n = by [4] + (y + y.s).n = by #5 + y.n + y.s = since y.n = x.n + x.n + y.s = since z and y are have the same tzinfo member, + y.s = z.s by #2 + x.n + z.s + +Plugging that back into [6] gives + + diff = + x.n - ((x.n + z.s) - z.o) = expanding + x.n - x.n - z.s + z.o = cancelling + - z.s + z.o = by #2 + z.d + +So diff = z.d. + +If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time +spelling we wanted in the endcase described above. We're done. Contrarily, +if z.d = 0, then we have a UTC equivalent, and are also done. + +If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to +add to z (in effect, z is in tz's standard time, and we need to shift the +local clock into tz's daylight time). + +Let + + z' = z + z.d = z + diff [7] + +and we can again ask whether + + z'.n - z'.o = x.n [8] + +If so, we're done. If not, the tzinfo class is insane, according to the +assumptions we've made. This also requires a bit of proof. As before, let's +compute the difference between the LHS and RHS of [8] (and skipping some of +the justifications for the kinds of substitutions we've done several times +already): + + diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7] + x.n - (z.n + diff - z'.o) = replacing diff via [6] + x.n - (z.n + x.n - (z.n - z.o) - z'.o) = + x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n + - z.n + z.n - z.o + z'.o = cancel z.n + - z.o + z'.o = #1 twice + -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo + z'.d - z.d + +So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal, +we've found the UTC-equivalent so are done. In fact, we stop with [7] and +return z', not bothering to compute z'.d. + +How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by +a dst() offset, and starting *from* a time already in DST (we know z.d != 0), +would have to change the result dst() returns: we start in DST, and moving +a little further into it takes us out of DST. + +There isn't a sane case where this can happen. The closest it gets is at +the end of DST, where there's an hour in UTC with no spelling in a hybrid +tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During +that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM +UTC) because the docs insist on that, but 0:MM is taken as being in daylight +time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local +clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in +standard time. Since that's what the local clock *does*, we want to map both +UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous +in local time, but so it goes -- it's the way the local clock works. + +When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0, +so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going. +z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8] +(correctly) concludes that z' is not UTC-equivalent to x. + +Because we know z.d said z was in daylight time (else [5] would have held and +we would have stopped then), and we know z.d != z'.d (else [8] would have held +and we have stopped then), and there are only 2 possible values dst() can +return in Eastern, it follows that z'.d must be 0 (which it is in the example, +but the reasoning doesn't depend on the example -- it depends on there being +two possible dst() outcomes, one zero and the other non-zero). Therefore +z' must be in standard time, and is the spelling we want in this case. + +Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is +concerned (because it takes z' as being in standard time rather than the +daylight time we intend here), but returning it gives the real-life "local +clock repeats an hour" behavior when mapping the "unspellable" UTC hour into +tz. + +When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with +the 1:MM standard time spelling we want. + +So how can this break? One of the assumptions must be violated. Two +possibilities: + +1) [2] effectively says that y.s is invariant across all y belong to a given + time zone. This isn't true if, for political reasons or continental drift, + a region decides to change its base offset from UTC. + +2) There may be versions of "double daylight" time where the tail end of + the analysis gives up a step too early. I haven't thought about that + enough to say. + +In any case, it's clear that the default fromutc() is strong enough to handle +"almost all" time zones: so long as the standard offset is invariant, it +doesn't matter if daylight time transition points change from year to year, or +if daylight time is skipped in some years; it doesn't matter how large or +small dst() may get within its bounds; and it doesn't even matter if some +perverse time zone returns a negative dst()). So a breaking case must be +pretty bizarre, and a tzinfo subclass can override fromutc() if it is. +""" +try: + from _datetime import * +except ImportError: + pass +else: + # Clean up unused names + del (_DAYNAMES, _DAYS_BEFORE_MONTH, _DAYS_IN_MONTH, + _DI100Y, _DI400Y, _DI4Y, _MAXORDINAL, _MONTHNAMES, + _build_struct_time, _call_tzinfo_method, _check_date_fields, + _check_time_fields, _check_tzinfo_arg, _check_tzname, + _check_utc_offset, _cmp, _cmperror, _date_class, _days_before_month, + _days_before_year, _days_in_month, _format_time, _is_leap, + _isoweek1monday, _math, _ord2ymd, _time, _time_class, _tzinfo_class, + _wrap_strftime, _ymd2ord) + # XXX Since import * above excludes names that start with _, + # docstring does not get overwritten. In the future, it may be + # appropriate to maintain a single module level docstring and + # remove the following line. + from _datetime import __doc__ diff --git a/src/future/backports/email/__init__.py b/src/future/backports/email/__init__.py new file mode 100644 index 00000000..f9523bc1 --- /dev/null +++ b/src/future/backports/email/__init__.py @@ -0,0 +1,78 @@ +# Copyright (C) 2001-2007 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +""" +Backport of the Python 3.3 email package for Python-Future. + +A package for parsing, handling, and generating email messages. +""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +# Install the surrogate escape handler here because this is used by many +# modules in the email package. +from future.utils import surrogateescape +surrogateescape.register_surrogateescape() +# (Should this be done globally by ``future``?) + + +__version__ = '5.1.0' + +__all__ = [ + 'base64mime', + 'charset', + 'encoders', + 'errors', + 'feedparser', + 'generator', + 'header', + 'iterators', + 'message', + 'message_from_file', + 'message_from_binary_file', + 'message_from_string', + 'message_from_bytes', + 'mime', + 'parser', + 'quoprimime', + 'utils', + ] + + + +# Some convenience routines. Don't import Parser and Message as side-effects +# of importing email since those cascadingly import most of the rest of the +# email package. +def message_from_string(s, *args, **kws): + """Parse a string into a Message object model. + + Optional _class and strict are passed to the Parser constructor. + """ + from future.backports.email.parser import Parser + return Parser(*args, **kws).parsestr(s) + +def message_from_bytes(s, *args, **kws): + """Parse a bytes string into a Message object model. + + Optional _class and strict are passed to the Parser constructor. + """ + from future.backports.email.parser import BytesParser + return BytesParser(*args, **kws).parsebytes(s) + +def message_from_file(fp, *args, **kws): + """Read a file and parse its contents into a Message object model. + + Optional _class and strict are passed to the Parser constructor. + """ + from future.backports.email.parser import Parser + return Parser(*args, **kws).parse(fp) + +def message_from_binary_file(fp, *args, **kws): + """Read a binary file and parse its contents into a Message object model. + + Optional _class and strict are passed to the Parser constructor. + """ + from future.backports.email.parser import BytesParser + return BytesParser(*args, **kws).parse(fp) diff --git a/src/future/backports/email/_encoded_words.py b/src/future/backports/email/_encoded_words.py new file mode 100644 index 00000000..7c4a5291 --- /dev/null +++ b/src/future/backports/email/_encoded_words.py @@ -0,0 +1,232 @@ +""" Routines for manipulating RFC2047 encoded words. + +This is currently a package-private API, but will be considered for promotion +to a public API if there is demand. + +""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future.builtins import bytes +from future.builtins import chr +from future.builtins import int +from future.builtins import str + +# An ecoded word looks like this: +# +# =?charset[*lang]?cte?encoded_string?= +# +# for more information about charset see the charset module. Here it is one +# of the preferred MIME charset names (hopefully; you never know when parsing). +# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case). In +# theory other letters could be used for other encodings, but in practice this +# (almost?) never happens. There could be a public API for adding entries +# to the CTE tables, but YAGNI for now. 'q' is Quoted Printable, 'b' is +# Base64. The meaning of encoded_string should be obvious. 'lang' is optional +# as indicated by the brackets (they are not part of the syntax) but is almost +# never encountered in practice. +# +# The general interface for a CTE decoder is that it takes the encoded_string +# as its argument, and returns a tuple (cte_decoded_string, defects). The +# cte_decoded_string is the original binary that was encoded using the +# specified cte. 'defects' is a list of MessageDefect instances indicating any +# problems encountered during conversion. 'charset' and 'lang' are the +# corresponding strings extracted from the EW, case preserved. +# +# The general interface for a CTE encoder is that it takes a binary sequence +# as input and returns the cte_encoded_string, which is an ascii-only string. +# +# Each decoder must also supply a length function that takes the binary +# sequence as its argument and returns the length of the resulting encoded +# string. +# +# The main API functions for the module are decode, which calls the decoder +# referenced by the cte specifier, and encode, which adds the appropriate +# RFC 2047 "chrome" to the encoded string, and can optionally automatically +# select the shortest possible encoding. See their docstrings below for +# details. + +import re +import base64 +import binascii +import functools +from string import ascii_letters, digits +from future.backports.email import errors + +__all__ = ['decode_q', + 'encode_q', + 'decode_b', + 'encode_b', + 'len_q', + 'len_b', + 'decode', + 'encode', + ] + +# +# Quoted Printable +# + +# regex based decoder. +_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub, + lambda m: bytes([int(m.group(1), 16)])) + +def decode_q(encoded): + encoded = bytes(encoded.replace(b'_', b' ')) + return _q_byte_subber(encoded), [] + + +# dict mapping bytes to their encoded form +class _QByteMap(dict): + + safe = bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')) + + def __missing__(self, key): + if key in self.safe: + self[key] = chr(key) + else: + self[key] = "={:02X}".format(key) + return self[key] + +_q_byte_map = _QByteMap() + +# In headers spaces are mapped to '_'. +_q_byte_map[ord(' ')] = '_' + +def encode_q(bstring): + return str(''.join(_q_byte_map[x] for x in bytes(bstring))) + +def len_q(bstring): + return sum(len(_q_byte_map[x]) for x in bytes(bstring)) + + +# +# Base64 +# + +def decode_b(encoded): + defects = [] + pad_err = len(encoded) % 4 + if pad_err: + defects.append(errors.InvalidBase64PaddingDefect()) + padded_encoded = encoded + b'==='[:4-pad_err] + else: + padded_encoded = encoded + try: + # The validate kwarg to b64decode is not supported in Py2.x + if not re.match(b'^[A-Za-z0-9+/]*={0,2}$', padded_encoded): + raise binascii.Error('Non-base64 digit found') + return base64.b64decode(padded_encoded), defects + except binascii.Error: + # Since we had correct padding, this must an invalid char error. + defects = [errors.InvalidBase64CharactersDefect()] + # The non-alphabet characters are ignored as far as padding + # goes, but we don't know how many there are. So we'll just + # try various padding lengths until something works. + for i in 0, 1, 2, 3: + try: + return base64.b64decode(encoded+b'='*i), defects + except (binascii.Error, TypeError): # Py2 raises a TypeError + if i==0: + defects.append(errors.InvalidBase64PaddingDefect()) + else: + # This should never happen. + raise AssertionError("unexpected binascii.Error") + +def encode_b(bstring): + return base64.b64encode(bstring).decode('ascii') + +def len_b(bstring): + groups_of_3, leftover = divmod(len(bstring), 3) + # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. + return groups_of_3 * 4 + (4 if leftover else 0) + + +_cte_decoders = { + 'q': decode_q, + 'b': decode_b, + } + +def decode(ew): + """Decode encoded word and return (string, charset, lang, defects) tuple. + + An RFC 2047/2243 encoded word has the form: + + =?charset*lang?cte?encoded_string?= + + where '*lang' may be omitted but the other parts may not be. + + This function expects exactly such a string (that is, it does not check the + syntax and may raise errors if the string is not well formed), and returns + the encoded_string decoded first from its Content Transfer Encoding and + then from the resulting bytes into unicode using the specified charset. If + the cte-decoded string does not successfully decode using the specified + character set, a defect is added to the defects list and the unknown octets + are replaced by the unicode 'unknown' character \uFDFF. + + The specified charset and language are returned. The default for language, + which is rarely if ever encountered, is the empty string. + + """ + _, charset, cte, cte_string, _ = str(ew).split('?') + charset, _, lang = charset.partition('*') + cte = cte.lower() + # Recover the original bytes and do CTE decoding. + bstring = cte_string.encode('ascii', 'surrogateescape') + bstring, defects = _cte_decoders[cte](bstring) + # Turn the CTE decoded bytes into unicode. + try: + string = bstring.decode(charset) + except UnicodeError: + defects.append(errors.UndecodableBytesDefect("Encoded word " + "contains bytes not decodable using {} charset".format(charset))) + string = bstring.decode(charset, 'surrogateescape') + except LookupError: + string = bstring.decode('ascii', 'surrogateescape') + if charset.lower() != 'unknown-8bit': + defects.append(errors.CharsetError("Unknown charset {} " + "in encoded word; decoded as unknown bytes".format(charset))) + return string, charset, lang, defects + + +_cte_encoders = { + 'q': encode_q, + 'b': encode_b, + } + +_cte_encode_length = { + 'q': len_q, + 'b': len_b, + } + +def encode(string, charset='utf-8', encoding=None, lang=''): + """Encode string using the CTE encoding that produces the shorter result. + + Produces an RFC 2047/2243 encoded word of the form: + + =?charset*lang?cte?encoded_string?= + + where '*lang' is omitted unless the 'lang' parameter is given a value. + Optional argument charset (defaults to utf-8) specifies the charset to use + to encode the string to binary before CTE encoding it. Optional argument + 'encoding' is the cte specifier for the encoding that should be used ('q' + or 'b'); if it is None (the default) the encoding which produces the + shortest encoded sequence is used, except that 'q' is preferred if it is up + to five characters longer. Optional argument 'lang' (default '') gives the + RFC 2243 language string to specify in the encoded word. + + """ + string = str(string) + if charset == 'unknown-8bit': + bstring = string.encode('ascii', 'surrogateescape') + else: + bstring = string.encode(charset) + if encoding is None: + qlen = _cte_encode_length['q'](bstring) + blen = _cte_encode_length['b'](bstring) + # Bias toward q. 5 is arbitrary. + encoding = 'q' if qlen - blen < 5 else 'b' + encoded = _cte_encoders[encoding](bstring) + if lang: + lang = '*' + lang + return "=?{0}{1}?{2}?{3}?=".format(charset, lang, encoding, encoded) diff --git a/src/future/backports/email/_header_value_parser.py b/src/future/backports/email/_header_value_parser.py new file mode 100644 index 00000000..59b1b318 --- /dev/null +++ b/src/future/backports/email/_header_value_parser.py @@ -0,0 +1,2965 @@ +"""Header value parser implementing various email-related RFC parsing rules. + +The parsing methods defined in this module implement various email related +parsing rules. Principal among them is RFC 5322, which is the followon +to RFC 2822 and primarily a clarification of the former. It also implements +RFC 2047 encoded word decoding. + +RFC 5322 goes to considerable trouble to maintain backward compatibility with +RFC 822 in the parse phase, while cleaning up the structure on the generation +phase. This parser supports correct RFC 5322 generation by tagging white space +as folding white space only when folding is allowed in the non-obsolete rule +sets. Actually, the parser is even more generous when accepting input than RFC +5322 mandates, following the spirit of Postel's Law, which RFC 5322 encourages. +Where possible deviations from the standard are annotated on the 'defects' +attribute of tokens that deviate. + +The general structure of the parser follows RFC 5322, and uses its terminology +where there is a direct correspondence. Where the implementation requires a +somewhat different structure than that used by the formal grammar, new terms +that mimic the closest existing terms are used. Thus, it really helps to have +a copy of RFC 5322 handy when studying this code. + +Input to the parser is a string that has already been unfolded according to +RFC 5322 rules. According to the RFC this unfolding is the very first step, and +this parser leaves the unfolding step to a higher level message parser, which +will have already detected the line breaks that need unfolding while +determining the beginning and end of each header. + +The output of the parser is a TokenList object, which is a list subclass. A +TokenList is a recursive data structure. The terminal nodes of the structure +are Terminal objects, which are subclasses of str. These do not correspond +directly to terminal objects in the formal grammar, but are instead more +practical higher level combinations of true terminals. + +All TokenList and Terminal objects have a 'value' attribute, which produces the +semantically meaningful value of that part of the parse subtree. The value of +all whitespace tokens (no matter how many sub-tokens they may contain) is a +single space, as per the RFC rules. This includes 'CFWS', which is herein +included in the general class of whitespace tokens. There is one exception to +the rule that whitespace tokens are collapsed into single spaces in values: in +the value of a 'bare-quoted-string' (a quoted-string with no leading or +trailing whitespace), any whitespace that appeared between the quotation marks +is preserved in the returned value. Note that in all Terminal strings quoted +pairs are turned into their unquoted values. + +All TokenList and Terminal objects also have a string value, which attempts to +be a "canonical" representation of the RFC-compliant form of the substring that +produced the parsed subtree, including minimal use of quoted pair quoting. +Whitespace runs are not collapsed. + +Comment tokens also have a 'content' attribute providing the string found +between the parens (including any nested comments) with whitespace preserved. + +All TokenList and Terminal objects have a 'defects' attribute which is a +possibly empty list all of the defects found while creating the token. Defects +may appear on any token in the tree, and a composite list of all defects in the +subtree is available through the 'all_defects' attribute of any node. (For +Terminal notes x.defects == x.all_defects.) + +Each object in a parse tree is called a 'token', and each has a 'token_type' +attribute that gives the name from the RFC 5322 grammar that it represents. +Not all RFC 5322 nodes are produced, and there is one non-RFC 5322 node that +may be produced: 'ptext'. A 'ptext' is a string of printable ascii characters. +It is returned in place of lists of (ctext/quoted-pair) and +(qtext/quoted-pair). + +XXX: provide complete list of token types. +""" +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future.builtins import int, range, str, super, list + +import re +from collections import namedtuple, OrderedDict + +from future.backports.urllib.parse import (unquote, unquote_to_bytes) +from future.backports.email import _encoded_words as _ew +from future.backports.email import errors +from future.backports.email import utils + +# +# Useful constants and functions +# + +WSP = set(' \t') +CFWS_LEADER = WSP | set('(') +SPECIALS = set(r'()<>@,:;.\"[]') +ATOM_ENDS = SPECIALS | WSP +DOT_ATOM_ENDS = ATOM_ENDS - set('.') +# '.', '"', and '(' do not end phrases in order to support obs-phrase +PHRASE_ENDS = SPECIALS - set('."(') +TSPECIALS = (SPECIALS | set('/?=')) - set('.') +TOKEN_ENDS = TSPECIALS | WSP +ASPECIALS = TSPECIALS | set("*'%") +ATTRIBUTE_ENDS = ASPECIALS | WSP +EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') + +def quote_string(value): + return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' + +# +# Accumulator for header folding +# + +class _Folded(object): + + def __init__(self, maxlen, policy): + self.maxlen = maxlen + self.policy = policy + self.lastlen = 0 + self.stickyspace = None + self.firstline = True + self.done = [] + self.current = list() # uses l.clear() + + def newline(self): + self.done.extend(self.current) + self.done.append(self.policy.linesep) + self.current.clear() + self.lastlen = 0 + + def finalize(self): + if self.current: + self.newline() + + def __str__(self): + return ''.join(self.done) + + def append(self, stoken): + self.current.append(stoken) + + def append_if_fits(self, token, stoken=None): + if stoken is None: + stoken = str(token) + l = len(stoken) + if self.stickyspace is not None: + stickyspace_len = len(self.stickyspace) + if self.lastlen + stickyspace_len + l <= self.maxlen: + self.current.append(self.stickyspace) + self.lastlen += stickyspace_len + self.current.append(stoken) + self.lastlen += l + self.stickyspace = None + self.firstline = False + return True + if token.has_fws: + ws = token.pop_leading_fws() + if ws is not None: + self.stickyspace += str(ws) + stickyspace_len += len(ws) + token._fold(self) + return True + if stickyspace_len and l + 1 <= self.maxlen: + margin = self.maxlen - l + if 0 < margin < stickyspace_len: + trim = stickyspace_len - margin + self.current.append(self.stickyspace[:trim]) + self.stickyspace = self.stickyspace[trim:] + stickyspace_len = trim + self.newline() + self.current.append(self.stickyspace) + self.current.append(stoken) + self.lastlen = l + stickyspace_len + self.stickyspace = None + self.firstline = False + return True + if not self.firstline: + self.newline() + self.current.append(self.stickyspace) + self.current.append(stoken) + self.stickyspace = None + self.firstline = False + return True + if self.lastlen + l <= self.maxlen: + self.current.append(stoken) + self.lastlen += l + return True + if l < self.maxlen: + self.newline() + self.current.append(stoken) + self.lastlen = l + return True + return False + +# +# TokenList and its subclasses +# + +class TokenList(list): + + token_type = None + + def __init__(self, *args, **kw): + super(TokenList, self).__init__(*args, **kw) + self.defects = [] + + def __str__(self): + return ''.join(str(x) for x in self) + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, + super(TokenList, self).__repr__()) + + @property + def value(self): + return ''.join(x.value for x in self if x.value) + + @property + def all_defects(self): + return sum((x.all_defects for x in self), self.defects) + + # + # Folding API + # + # parts(): + # + # return a list of objects that constitute the "higher level syntactic + # objects" specified by the RFC as the best places to fold a header line. + # The returned objects must include leading folding white space, even if + # this means mutating the underlying parse tree of the object. Each object + # is only responsible for returning *its* parts, and should not drill down + # to any lower level except as required to meet the leading folding white + # space constraint. + # + # _fold(folded): + # + # folded: the result accumulator. This is an instance of _Folded. + # (XXX: I haven't finished factoring this out yet, the folding code + # pretty much uses this as a state object.) When the folded.current + # contains as much text as will fit, the _fold method should call + # folded.newline. + # folded.lastlen: the current length of the test stored in folded.current. + # folded.maxlen: The maximum number of characters that may appear on a + # folded line. Differs from the policy setting in that "no limit" is + # represented by +inf, which means it can be used in the trivially + # logical fashion in comparisons. + # + # Currently no subclasses implement parts, and I think this will remain + # true. A subclass only needs to implement _fold when the generic version + # isn't sufficient. _fold will need to be implemented primarily when it is + # possible for encoded words to appear in the specialized token-list, since + # there is no generic algorithm that can know where exactly the encoded + # words are allowed. A _fold implementation is responsible for filling + # lines in the same general way that the top level _fold does. It may, and + # should, call the _fold method of sub-objects in a similar fashion to that + # of the top level _fold. + # + # XXX: I'm hoping it will be possible to factor the existing code further + # to reduce redundancy and make the logic clearer. + + @property + def parts(self): + klass = self.__class__ + this = list() + for token in self: + if token.startswith_fws(): + if this: + yield this[0] if len(this)==1 else klass(this) + this.clear() + end_ws = token.pop_trailing_ws() + this.append(token) + if end_ws: + yield klass(this) + this = [end_ws] + if this: + yield this[0] if len(this)==1 else klass(this) + + def startswith_fws(self): + return self[0].startswith_fws() + + def pop_leading_fws(self): + if self[0].token_type == 'fws': + return self.pop(0) + return self[0].pop_leading_fws() + + def pop_trailing_ws(self): + if self[-1].token_type == 'cfws': + return self.pop(-1) + return self[-1].pop_trailing_ws() + + @property + def has_fws(self): + for part in self: + if part.has_fws: + return True + return False + + def has_leading_comment(self): + return self[0].has_leading_comment() + + @property + def comments(self): + comments = [] + for token in self: + comments.extend(token.comments) + return comments + + def fold(self, **_3to2kwargs): + # max_line_length 0/None means no limit, ie: infinitely long. + policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] + maxlen = policy.max_line_length or float("+inf") + folded = _Folded(maxlen, policy) + self._fold(folded) + folded.finalize() + return str(folded) + + def as_encoded_word(self, charset): + # This works only for things returned by 'parts', which include + # the leading fws, if any, that should be used. + res = [] + ws = self.pop_leading_fws() + if ws: + res.append(ws) + trailer = self.pop(-1) if self[-1].token_type=='fws' else '' + res.append(_ew.encode(str(self), charset)) + res.append(trailer) + return ''.join(res) + + def cte_encode(self, charset, policy): + res = [] + for part in self: + res.append(part.cte_encode(charset, policy)) + return ''.join(res) + + def _fold(self, folded): + for part in self.parts: + tstr = str(part) + tlen = len(tstr) + try: + str(part).encode('us-ascii') + except UnicodeEncodeError: + if any(isinstance(x, errors.UndecodableBytesDefect) + for x in part.all_defects): + charset = 'unknown-8bit' + else: + # XXX: this should be a policy setting + charset = 'utf-8' + tstr = part.cte_encode(charset, folded.policy) + tlen = len(tstr) + if folded.append_if_fits(part, tstr): + continue + # Peel off the leading whitespace if any and make it sticky, to + # avoid infinite recursion. + ws = part.pop_leading_fws() + if ws is not None: + # Peel off the leading whitespace and make it sticky, to + # avoid infinite recursion. + folded.stickyspace = str(part.pop(0)) + if folded.append_if_fits(part): + continue + if part.has_fws: + part._fold(folded) + continue + # There are no fold points in this one; it is too long for a single + # line and can't be split...we just have to put it on its own line. + folded.append(tstr) + folded.newline() + + def pprint(self, indent=''): + print('\n'.join(self._pp(indent=''))) + + def ppstr(self, indent=''): + return '\n'.join(self._pp(indent='')) + + def _pp(self, indent=''): + yield '{}{}/{}('.format( + indent, + self.__class__.__name__, + self.token_type) + for token in self: + if not hasattr(token, '_pp'): + yield (indent + ' !! invalid element in token ' + 'list: {!r}'.format(token)) + else: + for line in token._pp(indent+' '): + yield line + if self.defects: + extra = ' Defects: {}'.format(self.defects) + else: + extra = '' + yield '{}){}'.format(indent, extra) + + +class WhiteSpaceTokenList(TokenList): + + @property + def value(self): + return ' ' + + @property + def comments(self): + return [x.content for x in self if x.token_type=='comment'] + + +class UnstructuredTokenList(TokenList): + + token_type = 'unstructured' + + def _fold(self, folded): + if any(x.token_type=='encoded-word' for x in self): + return self._fold_encoded(folded) + # Here we can have either a pure ASCII string that may or may not + # have surrogateescape encoded bytes, or a unicode string. + last_ew = None + for part in self.parts: + tstr = str(part) + is_ew = False + try: + str(part).encode('us-ascii') + except UnicodeEncodeError: + if any(isinstance(x, errors.UndecodableBytesDefect) + for x in part.all_defects): + charset = 'unknown-8bit' + else: + charset = 'utf-8' + if last_ew is not None: + # We've already done an EW, combine this one with it + # if there's room. + chunk = get_unstructured( + ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset) + oldlastlen = sum(len(x) for x in folded.current[:last_ew]) + schunk = str(chunk) + lchunk = len(schunk) + if oldlastlen + lchunk <= folded.maxlen: + del folded.current[last_ew:] + folded.append(schunk) + folded.lastlen = oldlastlen + lchunk + continue + tstr = part.as_encoded_word(charset) + is_ew = True + if folded.append_if_fits(part, tstr): + if is_ew: + last_ew = len(folded.current) - 1 + continue + if is_ew or last_ew: + # It's too big to fit on the line, but since we've + # got encoded words we can use encoded word folding. + part._fold_as_ew(folded) + continue + # Peel off the leading whitespace if any and make it sticky, to + # avoid infinite recursion. + ws = part.pop_leading_fws() + if ws is not None: + folded.stickyspace = str(ws) + if folded.append_if_fits(part): + continue + if part.has_fws: + part.fold(folded) + continue + # It can't be split...we just have to put it on its own line. + folded.append(tstr) + folded.newline() + last_ew = None + + def cte_encode(self, charset, policy): + res = [] + last_ew = None + for part in self: + spart = str(part) + try: + spart.encode('us-ascii') + res.append(spart) + except UnicodeEncodeError: + if last_ew is None: + res.append(part.cte_encode(charset, policy)) + last_ew = len(res) + else: + tl = get_unstructured(''.join(res[last_ew:] + [spart])) + res.append(tl.as_encoded_word()) + return ''.join(res) + + +class Phrase(TokenList): + + token_type = 'phrase' + + def _fold(self, folded): + # As with Unstructured, we can have pure ASCII with or without + # surrogateescape encoded bytes, or we could have unicode. But this + # case is more complicated, since we have to deal with the various + # sub-token types and how they can be composed in the face of + # unicode-that-needs-CTE-encoding, and the fact that if a token a + # comment that becomes a barrier across which we can't compose encoded + # words. + last_ew = None + for part in self.parts: + tstr = str(part) + tlen = len(tstr) + has_ew = False + try: + str(part).encode('us-ascii') + except UnicodeEncodeError: + if any(isinstance(x, errors.UndecodableBytesDefect) + for x in part.all_defects): + charset = 'unknown-8bit' + else: + charset = 'utf-8' + if last_ew is not None and not part.has_leading_comment(): + # We've already done an EW, let's see if we can combine + # this one with it. The last_ew logic ensures that all we + # have at this point is atoms, no comments or quoted + # strings. So we can treat the text between the last + # encoded word and the content of this token as + # unstructured text, and things will work correctly. But + # we have to strip off any trailing comment on this token + # first, and if it is a quoted string we have to pull out + # the content (we're encoding it, so it no longer needs to + # be quoted). + if part[-1].token_type == 'cfws' and part.comments: + remainder = part.pop(-1) + else: + remainder = '' + for i, token in enumerate(part): + if token.token_type == 'bare-quoted-string': + part[i] = UnstructuredTokenList(token[:]) + chunk = get_unstructured( + ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset) + schunk = str(chunk) + lchunk = len(schunk) + if last_ew + lchunk <= folded.maxlen: + del folded.current[last_ew:] + folded.append(schunk) + folded.lastlen = sum(len(x) for x in folded.current) + continue + tstr = part.as_encoded_word(charset) + tlen = len(tstr) + has_ew = True + if folded.append_if_fits(part, tstr): + if has_ew and not part.comments: + last_ew = len(folded.current) - 1 + elif part.comments or part.token_type == 'quoted-string': + # If a comment is involved we can't combine EWs. And if a + # quoted string is involved, it's not worth the effort to + # try to combine them. + last_ew = None + continue + part._fold(folded) + + def cte_encode(self, charset, policy): + res = [] + last_ew = None + is_ew = False + for part in self: + spart = str(part) + try: + spart.encode('us-ascii') + res.append(spart) + except UnicodeEncodeError: + is_ew = True + if last_ew is None: + if not part.comments: + last_ew = len(res) + res.append(part.cte_encode(charset, policy)) + elif not part.has_leading_comment(): + if part[-1].token_type == 'cfws' and part.comments: + remainder = part.pop(-1) + else: + remainder = '' + for i, token in enumerate(part): + if token.token_type == 'bare-quoted-string': + part[i] = UnstructuredTokenList(token[:]) + tl = get_unstructured(''.join(res[last_ew:] + [spart])) + res[last_ew:] = [tl.as_encoded_word(charset)] + if part.comments or (not is_ew and part.token_type == 'quoted-string'): + last_ew = None + return ''.join(res) + +class Word(TokenList): + + token_type = 'word' + + +class CFWSList(WhiteSpaceTokenList): + + token_type = 'cfws' + + def has_leading_comment(self): + return bool(self.comments) + + +class Atom(TokenList): + + token_type = 'atom' + + +class Token(TokenList): + + token_type = 'token' + + +class EncodedWord(TokenList): + + token_type = 'encoded-word' + cte = None + charset = None + lang = None + + @property + def encoded(self): + if self.cte is not None: + return self.cte + _ew.encode(str(self), self.charset) + + + +class QuotedString(TokenList): + + token_type = 'quoted-string' + + @property + def content(self): + for x in self: + if x.token_type == 'bare-quoted-string': + return x.value + + @property + def quoted_value(self): + res = [] + for x in self: + if x.token_type == 'bare-quoted-string': + res.append(str(x)) + else: + res.append(x.value) + return ''.join(res) + + @property + def stripped_value(self): + for token in self: + if token.token_type == 'bare-quoted-string': + return token.value + + +class BareQuotedString(QuotedString): + + token_type = 'bare-quoted-string' + + def __str__(self): + return quote_string(''.join(str(x) for x in self)) + + @property + def value(self): + return ''.join(str(x) for x in self) + + +class Comment(WhiteSpaceTokenList): + + token_type = 'comment' + + def __str__(self): + return ''.join(sum([ + ["("], + [self.quote(x) for x in self], + [")"], + ], [])) + + def quote(self, value): + if value.token_type == 'comment': + return str(value) + return str(value).replace('\\', '\\\\').replace( + '(', '\(').replace( + ')', '\)') + + @property + def content(self): + return ''.join(str(x) for x in self) + + @property + def comments(self): + return [self.content] + +class AddressList(TokenList): + + token_type = 'address-list' + + @property + def addresses(self): + return [x for x in self if x.token_type=='address'] + + @property + def mailboxes(self): + return sum((x.mailboxes + for x in self if x.token_type=='address'), []) + + @property + def all_mailboxes(self): + return sum((x.all_mailboxes + for x in self if x.token_type=='address'), []) + + +class Address(TokenList): + + token_type = 'address' + + @property + def display_name(self): + if self[0].token_type == 'group': + return self[0].display_name + + @property + def mailboxes(self): + if self[0].token_type == 'mailbox': + return [self[0]] + elif self[0].token_type == 'invalid-mailbox': + return [] + return self[0].mailboxes + + @property + def all_mailboxes(self): + if self[0].token_type == 'mailbox': + return [self[0]] + elif self[0].token_type == 'invalid-mailbox': + return [self[0]] + return self[0].all_mailboxes + +class MailboxList(TokenList): + + token_type = 'mailbox-list' + + @property + def mailboxes(self): + return [x for x in self if x.token_type=='mailbox'] + + @property + def all_mailboxes(self): + return [x for x in self + if x.token_type in ('mailbox', 'invalid-mailbox')] + + +class GroupList(TokenList): + + token_type = 'group-list' + + @property + def mailboxes(self): + if not self or self[0].token_type != 'mailbox-list': + return [] + return self[0].mailboxes + + @property + def all_mailboxes(self): + if not self or self[0].token_type != 'mailbox-list': + return [] + return self[0].all_mailboxes + + +class Group(TokenList): + + token_type = "group" + + @property + def mailboxes(self): + if self[2].token_type != 'group-list': + return [] + return self[2].mailboxes + + @property + def all_mailboxes(self): + if self[2].token_type != 'group-list': + return [] + return self[2].all_mailboxes + + @property + def display_name(self): + return self[0].display_name + + +class NameAddr(TokenList): + + token_type = 'name-addr' + + @property + def display_name(self): + if len(self) == 1: + return None + return self[0].display_name + + @property + def local_part(self): + return self[-1].local_part + + @property + def domain(self): + return self[-1].domain + + @property + def route(self): + return self[-1].route + + @property + def addr_spec(self): + return self[-1].addr_spec + + +class AngleAddr(TokenList): + + token_type = 'angle-addr' + + @property + def local_part(self): + for x in self: + if x.token_type == 'addr-spec': + return x.local_part + + @property + def domain(self): + for x in self: + if x.token_type == 'addr-spec': + return x.domain + + @property + def route(self): + for x in self: + if x.token_type == 'obs-route': + return x.domains + + @property + def addr_spec(self): + for x in self: + if x.token_type == 'addr-spec': + return x.addr_spec + else: + return '<>' + + +class ObsRoute(TokenList): + + token_type = 'obs-route' + + @property + def domains(self): + return [x.domain for x in self if x.token_type == 'domain'] + + +class Mailbox(TokenList): + + token_type = 'mailbox' + + @property + def display_name(self): + if self[0].token_type == 'name-addr': + return self[0].display_name + + @property + def local_part(self): + return self[0].local_part + + @property + def domain(self): + return self[0].domain + + @property + def route(self): + if self[0].token_type == 'name-addr': + return self[0].route + + @property + def addr_spec(self): + return self[0].addr_spec + + +class InvalidMailbox(TokenList): + + token_type = 'invalid-mailbox' + + @property + def display_name(self): + return None + + local_part = domain = route = addr_spec = display_name + + +class Domain(TokenList): + + token_type = 'domain' + + @property + def domain(self): + return ''.join(super(Domain, self).value.split()) + + +class DotAtom(TokenList): + + token_type = 'dot-atom' + + +class DotAtomText(TokenList): + + token_type = 'dot-atom-text' + + +class AddrSpec(TokenList): + + token_type = 'addr-spec' + + @property + def local_part(self): + return self[0].local_part + + @property + def domain(self): + if len(self) < 3: + return None + return self[-1].domain + + @property + def value(self): + if len(self) < 3: + return self[0].value + return self[0].value.rstrip()+self[1].value+self[2].value.lstrip() + + @property + def addr_spec(self): + nameset = set(self.local_part) + if len(nameset) > len(nameset-DOT_ATOM_ENDS): + lp = quote_string(self.local_part) + else: + lp = self.local_part + if self.domain is not None: + return lp + '@' + self.domain + return lp + + +class ObsLocalPart(TokenList): + + token_type = 'obs-local-part' + + +class DisplayName(Phrase): + + token_type = 'display-name' + + @property + def display_name(self): + res = TokenList(self) + if res[0].token_type == 'cfws': + res.pop(0) + else: + if res[0][0].token_type == 'cfws': + res[0] = TokenList(res[0][1:]) + if res[-1].token_type == 'cfws': + res.pop() + else: + if res[-1][-1].token_type == 'cfws': + res[-1] = TokenList(res[-1][:-1]) + return res.value + + @property + def value(self): + quote = False + if self.defects: + quote = True + else: + for x in self: + if x.token_type == 'quoted-string': + quote = True + if quote: + pre = post = '' + if self[0].token_type=='cfws' or self[0][0].token_type=='cfws': + pre = ' ' + if self[-1].token_type=='cfws' or self[-1][-1].token_type=='cfws': + post = ' ' + return pre+quote_string(self.display_name)+post + else: + return super(DisplayName, self).value + + +class LocalPart(TokenList): + + token_type = 'local-part' + + @property + def value(self): + if self[0].token_type == "quoted-string": + return self[0].quoted_value + else: + return self[0].value + + @property + def local_part(self): + # Strip whitespace from front, back, and around dots. + res = [DOT] + last = DOT + last_is_tl = False + for tok in self[0] + [DOT]: + if tok.token_type == 'cfws': + continue + if (last_is_tl and tok.token_type == 'dot' and + last[-1].token_type == 'cfws'): + res[-1] = TokenList(last[:-1]) + is_tl = isinstance(tok, TokenList) + if (is_tl and last.token_type == 'dot' and + tok[0].token_type == 'cfws'): + res.append(TokenList(tok[1:])) + else: + res.append(tok) + last = res[-1] + last_is_tl = is_tl + res = TokenList(res[1:-1]) + return res.value + + +class DomainLiteral(TokenList): + + token_type = 'domain-literal' + + @property + def domain(self): + return ''.join(super(DomainLiteral, self).value.split()) + + @property + def ip(self): + for x in self: + if x.token_type == 'ptext': + return x.value + + +class MIMEVersion(TokenList): + + token_type = 'mime-version' + major = None + minor = None + + +class Parameter(TokenList): + + token_type = 'parameter' + sectioned = False + extended = False + charset = 'us-ascii' + + @property + def section_number(self): + # Because the first token, the attribute (name) eats CFWS, the second + # token is always the section if there is one. + return self[1].number if self.sectioned else 0 + + @property + def param_value(self): + # This is part of the "handle quoted extended parameters" hack. + for token in self: + if token.token_type == 'value': + return token.stripped_value + if token.token_type == 'quoted-string': + for token in token: + if token.token_type == 'bare-quoted-string': + for token in token: + if token.token_type == 'value': + return token.stripped_value + return '' + + +class InvalidParameter(Parameter): + + token_type = 'invalid-parameter' + + +class Attribute(TokenList): + + token_type = 'attribute' + + @property + def stripped_value(self): + for token in self: + if token.token_type.endswith('attrtext'): + return token.value + +class Section(TokenList): + + token_type = 'section' + number = None + + +class Value(TokenList): + + token_type = 'value' + + @property + def stripped_value(self): + token = self[0] + if token.token_type == 'cfws': + token = self[1] + if token.token_type.endswith( + ('quoted-string', 'attribute', 'extended-attribute')): + return token.stripped_value + return self.value + + +class MimeParameters(TokenList): + + token_type = 'mime-parameters' + + @property + def params(self): + # The RFC specifically states that the ordering of parameters is not + # guaranteed and may be reordered by the transport layer. So we have + # to assume the RFC 2231 pieces can come in any order. However, we + # output them in the order that we first see a given name, which gives + # us a stable __str__. + params = OrderedDict() + for token in self: + if not token.token_type.endswith('parameter'): + continue + if token[0].token_type != 'attribute': + continue + name = token[0].value.strip() + if name not in params: + params[name] = [] + params[name].append((token.section_number, token)) + for name, parts in params.items(): + parts = sorted(parts) + # XXX: there might be more recovery we could do here if, for + # example, this is really a case of a duplicate attribute name. + value_parts = [] + charset = parts[0][1].charset + for i, (section_number, param) in enumerate(parts): + if section_number != i: + param.defects.append(errors.InvalidHeaderDefect( + "inconsistent multipart parameter numbering")) + value = param.param_value + if param.extended: + try: + value = unquote_to_bytes(value) + except UnicodeEncodeError: + # source had surrogate escaped bytes. What we do now + # is a bit of an open question. I'm not sure this is + # the best choice, but it is what the old algorithm did + value = unquote(value, encoding='latin-1') + else: + try: + value = value.decode(charset, 'surrogateescape') + except LookupError: + # XXX: there should really be a custom defect for + # unknown character set to make it easy to find, + # because otherwise unknown charset is a silent + # failure. + value = value.decode('us-ascii', 'surrogateescape') + if utils._has_surrogates(value): + param.defects.append(errors.UndecodableBytesDefect()) + value_parts.append(value) + value = ''.join(value_parts) + yield name, value + + def __str__(self): + params = [] + for name, value in self.params: + if value: + params.append('{}={}'.format(name, quote_string(value))) + else: + params.append(name) + params = '; '.join(params) + return ' ' + params if params else '' + + +class ParameterizedHeaderValue(TokenList): + + @property + def params(self): + for token in reversed(self): + if token.token_type == 'mime-parameters': + return token.params + return {} + + @property + def parts(self): + if self and self[-1].token_type == 'mime-parameters': + # We don't want to start a new line if all of the params don't fit + # after the value, so unwrap the parameter list. + return TokenList(self[:-1] + self[-1]) + return TokenList(self).parts + + +class ContentType(ParameterizedHeaderValue): + + token_type = 'content-type' + maintype = 'text' + subtype = 'plain' + + +class ContentDisposition(ParameterizedHeaderValue): + + token_type = 'content-disposition' + content_disposition = None + + +class ContentTransferEncoding(TokenList): + + token_type = 'content-transfer-encoding' + cte = '7bit' + + +class HeaderLabel(TokenList): + + token_type = 'header-label' + + +class Header(TokenList): + + token_type = 'header' + + def _fold(self, folded): + folded.append(str(self.pop(0))) + folded.lastlen = len(folded.current[0]) + # The first line of the header is different from all others: we don't + # want to start a new object on a new line if it has any fold points in + # it that would allow part of it to be on the first header line. + # Further, if the first fold point would fit on the new line, we want + # to do that, but if it doesn't we want to put it on the first line. + # Folded supports this via the stickyspace attribute. If this + # attribute is not None, it does the special handling. + folded.stickyspace = str(self.pop(0)) if self[0].token_type == 'cfws' else '' + rest = self.pop(0) + if self: + raise ValueError("Malformed Header token list") + rest._fold(folded) + + +# +# Terminal classes and instances +# + +class Terminal(str): + + def __new__(cls, value, token_type): + self = super(Terminal, cls).__new__(cls, value) + self.token_type = token_type + self.defects = [] + return self + + def __repr__(self): + return "{}({})".format(self.__class__.__name__, super(Terminal, self).__repr__()) + + @property + def all_defects(self): + return list(self.defects) + + def _pp(self, indent=''): + return ["{}{}/{}({}){}".format( + indent, + self.__class__.__name__, + self.token_type, + super(Terminal, self).__repr__(), + '' if not self.defects else ' {}'.format(self.defects), + )] + + def cte_encode(self, charset, policy): + value = str(self) + try: + value.encode('us-ascii') + return value + except UnicodeEncodeError: + return _ew.encode(value, charset) + + def pop_trailing_ws(self): + # This terminates the recursion. + return None + + def pop_leading_fws(self): + # This terminates the recursion. + return None + + @property + def comments(self): + return [] + + def has_leading_comment(self): + return False + + def __getnewargs__(self): + return(str(self), self.token_type) + + +class WhiteSpaceTerminal(Terminal): + + @property + def value(self): + return ' ' + + def startswith_fws(self): + return True + + has_fws = True + + +class ValueTerminal(Terminal): + + @property + def value(self): + return self + + def startswith_fws(self): + return False + + has_fws = False + + def as_encoded_word(self, charset): + return _ew.encode(str(self), charset) + + +class EWWhiteSpaceTerminal(WhiteSpaceTerminal): + + @property + def value(self): + return '' + + @property + def encoded(self): + return self[:] + + def __str__(self): + return '' + + has_fws = True + + +# XXX these need to become classes and used as instances so +# that a program can't change them in a parse tree and screw +# up other parse trees. Maybe should have tests for that, too. +DOT = ValueTerminal('.', 'dot') +ListSeparator = ValueTerminal(',', 'list-separator') +RouteComponentMarker = ValueTerminal('@', 'route-component-marker') + +# +# Parser +# + +"""Parse strings according to RFC822/2047/2822/5322 rules. + +This is a stateless parser. Each get_XXX function accepts a string and +returns either a Terminal or a TokenList representing the RFC object named +by the method and a string containing the remaining unparsed characters +from the input. Thus a parser method consumes the next syntactic construct +of a given type and returns a token representing the construct plus the +unparsed remainder of the input string. + +For example, if the first element of a structured header is a 'phrase', +then: + + phrase, value = get_phrase(value) + +returns the complete phrase from the start of the string value, plus any +characters left in the string after the phrase is removed. + +""" + +_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split +_non_atom_end_matcher = re.compile(r"[^{}]+".format( + ''.join(ATOM_ENDS).replace('\\','\\\\').replace(']','\]'))).match +_non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall +_non_token_end_matcher = re.compile(r"[^{}]+".format( + ''.join(TOKEN_ENDS).replace('\\','\\\\').replace(']','\]'))).match +_non_attribute_end_matcher = re.compile(r"[^{}]+".format( + ''.join(ATTRIBUTE_ENDS).replace('\\','\\\\').replace(']','\]'))).match +_non_extended_attribute_end_matcher = re.compile(r"[^{}]+".format( + ''.join(EXTENDED_ATTRIBUTE_ENDS).replace( + '\\','\\\\').replace(']','\]'))).match + +def _validate_xtext(xtext): + """If input token contains ASCII non-printables, register a defect.""" + + non_printables = _non_printable_finder(xtext) + if non_printables: + xtext.defects.append(errors.NonPrintableDefect(non_printables)) + if utils._has_surrogates(xtext): + xtext.defects.append(errors.UndecodableBytesDefect( + "Non-ASCII characters found in header token")) + +def _get_ptext_to_endchars(value, endchars): + """Scan printables/quoted-pairs until endchars and return unquoted ptext. + + This function turns a run of qcontent, ccontent-without-comments, or + dtext-with-quoted-printables into a single string by unquoting any + quoted printables. It returns the string, the remaining value, and + a flag that is True iff there were any quoted printables decoded. + + """ + _3to2list = list(_wsp_splitter(value, 1)) + fragment, remainder, = _3to2list[:1] + [_3to2list[1:]] + vchars = [] + escape = False + had_qp = False + for pos in range(len(fragment)): + if fragment[pos] == '\\': + if escape: + escape = False + had_qp = True + else: + escape = True + continue + if escape: + escape = False + elif fragment[pos] in endchars: + break + vchars.append(fragment[pos]) + else: + pos = pos + 1 + return ''.join(vchars), ''.join([fragment[pos:]] + remainder), had_qp + +def _decode_ew_run(value): + """ Decode a run of RFC2047 encoded words. + + _decode_ew_run(value) -> (text, value, defects) + + Scans the supplied value for a run of tokens that look like they are RFC + 2047 encoded words, decodes those words into text according to RFC 2047 + rules (whitespace between encoded words is discarded), and returns the text + and the remaining value (including any leading whitespace on the remaining + value), as well as a list of any defects encountered while decoding. The + input value may not have any leading whitespace. + + """ + res = [] + defects = [] + last_ws = '' + while value: + try: + tok, ws, value = _wsp_splitter(value, 1) + except ValueError: + tok, ws, value = value, '', '' + if not (tok.startswith('=?') and tok.endswith('?=')): + return ''.join(res), last_ws + tok + ws + value, defects + text, charset, lang, new_defects = _ew.decode(tok) + res.append(text) + defects.extend(new_defects) + last_ws = ws + return ''.join(res), last_ws, defects + +def get_fws(value): + """FWS = 1*WSP + + This isn't the RFC definition. We're using fws to represent tokens where + folding can be done, but when we are parsing the *un*folding has already + been done so we don't need to watch out for CRLF. + + """ + newvalue = value.lstrip() + fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws') + return fws, newvalue + +def get_encoded_word(value): + """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" + + """ + ew = EncodedWord() + if not value.startswith('=?'): + raise errors.HeaderParseError( + "expected encoded word but found {}".format(value)) + _3to2list1 = list(value[2:].split('?=', 1)) + tok, remainder, = _3to2list1[:1] + [_3to2list1[1:]] + if tok == value[2:]: + raise errors.HeaderParseError( + "expected encoded word but found {}".format(value)) + remstr = ''.join(remainder) + if remstr[:2].isdigit(): + _3to2list3 = list(remstr.split('?=', 1)) + rest, remainder, = _3to2list3[:1] + [_3to2list3[1:]] + tok = tok + '?=' + rest + if len(tok.split()) > 1: + ew.defects.append(errors.InvalidHeaderDefect( + "whitespace inside encoded word")) + ew.cte = value + value = ''.join(remainder) + try: + text, charset, lang, defects = _ew.decode('=?' + tok + '?=') + except ValueError: + raise errors.HeaderParseError( + "encoded word format invalid: '{}'".format(ew.cte)) + ew.charset = charset + ew.lang = lang + ew.defects.extend(defects) + while text: + if text[0] in WSP: + token, text = get_fws(text) + ew.append(token) + continue + _3to2list5 = list(_wsp_splitter(text, 1)) + chars, remainder, = _3to2list5[:1] + [_3to2list5[1:]] + vtext = ValueTerminal(chars, 'vtext') + _validate_xtext(vtext) + ew.append(vtext) + text = ''.join(remainder) + return ew, value + +def get_unstructured(value): + """unstructured = (*([FWS] vchar) *WSP) / obs-unstruct + obs-unstruct = *((*LF *CR *(obs-utext) *LF *CR)) / FWS) + obs-utext = %d0 / obs-NO-WS-CTL / LF / CR + + obs-NO-WS-CTL is control characters except WSP/CR/LF. + + So, basically, we have printable runs, plus control characters or nulls in + the obsolete syntax, separated by whitespace. Since RFC 2047 uses the + obsolete syntax in its specification, but requires whitespace on either + side of the encoded words, I can see no reason to need to separate the + non-printable-non-whitespace from the printable runs if they occur, so we + parse this into xtext tokens separated by WSP tokens. + + Because an 'unstructured' value must by definition constitute the entire + value, this 'get' routine does not return a remaining value, only the + parsed TokenList. + + """ + # XXX: but what about bare CR and LF? They might signal the start or + # end of an encoded word. YAGNI for now, since out current parsers + # will never send us strings with bard CR or LF. + + unstructured = UnstructuredTokenList() + while value: + if value[0] in WSP: + token, value = get_fws(value) + unstructured.append(token) + continue + if value.startswith('=?'): + try: + token, value = get_encoded_word(value) + except errors.HeaderParseError: + pass + else: + have_ws = True + if len(unstructured) > 0: + if unstructured[-1].token_type != 'fws': + unstructured.defects.append(errors.InvalidHeaderDefect( + "missing whitespace before encoded word")) + have_ws = False + if have_ws and len(unstructured) > 1: + if unstructured[-2].token_type == 'encoded-word': + unstructured[-1] = EWWhiteSpaceTerminal( + unstructured[-1], 'fws') + unstructured.append(token) + continue + _3to2list7 = list(_wsp_splitter(value, 1)) + tok, remainder, = _3to2list7[:1] + [_3to2list7[1:]] + vtext = ValueTerminal(tok, 'vtext') + _validate_xtext(vtext) + unstructured.append(vtext) + value = ''.join(remainder) + return unstructured + +def get_qp_ctext(value): + """ctext = + + This is not the RFC ctext, since we are handling nested comments in comment + and unquoting quoted-pairs here. We allow anything except the '()' + characters, but if we find any ASCII other than the RFC defined printable + ASCII an NonPrintableDefect is added to the token's defects list. Since + quoted pairs are converted to their unquoted values, what is returned is + a 'ptext' token. In this case it is a WhiteSpaceTerminal, so it's value + is ' '. + + """ + ptext, value, _ = _get_ptext_to_endchars(value, '()') + ptext = WhiteSpaceTerminal(ptext, 'ptext') + _validate_xtext(ptext) + return ptext, value + +def get_qcontent(value): + """qcontent = qtext / quoted-pair + + We allow anything except the DQUOTE character, but if we find any ASCII + other than the RFC defined printable ASCII an NonPrintableDefect is + added to the token's defects list. Any quoted pairs are converted to their + unquoted values, so what is returned is a 'ptext' token. In this case it + is a ValueTerminal. + + """ + ptext, value, _ = _get_ptext_to_endchars(value, '"') + ptext = ValueTerminal(ptext, 'ptext') + _validate_xtext(ptext) + return ptext, value + +def get_atext(value): + """atext = + + We allow any non-ATOM_ENDS in atext, but add an InvalidATextDefect to + the token's defects list if we find non-atext characters. + """ + m = _non_atom_end_matcher(value) + if not m: + raise errors.HeaderParseError( + "expected atext but found '{}'".format(value)) + atext = m.group() + value = value[len(atext):] + atext = ValueTerminal(atext, 'atext') + _validate_xtext(atext) + return atext, value + +def get_bare_quoted_string(value): + """bare-quoted-string = DQUOTE *([FWS] qcontent) [FWS] DQUOTE + + A quoted-string without the leading or trailing white space. Its + value is the text between the quote marks, with whitespace + preserved and quoted pairs decoded. + """ + if value[0] != '"': + raise errors.HeaderParseError( + "expected '\"' but found '{}'".format(value)) + bare_quoted_string = BareQuotedString() + value = value[1:] + while value and value[0] != '"': + if value[0] in WSP: + token, value = get_fws(value) + else: + token, value = get_qcontent(value) + bare_quoted_string.append(token) + if not value: + bare_quoted_string.defects.append(errors.InvalidHeaderDefect( + "end of header inside quoted string")) + return bare_quoted_string, value + return bare_quoted_string, value[1:] + +def get_comment(value): + """comment = "(" *([FWS] ccontent) [FWS] ")" + ccontent = ctext / quoted-pair / comment + + We handle nested comments here, and quoted-pair in our qp-ctext routine. + """ + if value and value[0] != '(': + raise errors.HeaderParseError( + "expected '(' but found '{}'".format(value)) + comment = Comment() + value = value[1:] + while value and value[0] != ")": + if value[0] in WSP: + token, value = get_fws(value) + elif value[0] == '(': + token, value = get_comment(value) + else: + token, value = get_qp_ctext(value) + comment.append(token) + if not value: + comment.defects.append(errors.InvalidHeaderDefect( + "end of header inside comment")) + return comment, value + return comment, value[1:] + +def get_cfws(value): + """CFWS = (1*([FWS] comment) [FWS]) / FWS + + """ + cfws = CFWSList() + while value and value[0] in CFWS_LEADER: + if value[0] in WSP: + token, value = get_fws(value) + else: + token, value = get_comment(value) + cfws.append(token) + return cfws, value + +def get_quoted_string(value): + """quoted-string = [CFWS] [CFWS] + + 'bare-quoted-string' is an intermediate class defined by this + parser and not by the RFC grammar. It is the quoted string + without any attached CFWS. + """ + quoted_string = QuotedString() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + quoted_string.append(token) + token, value = get_bare_quoted_string(value) + quoted_string.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + quoted_string.append(token) + return quoted_string, value + +def get_atom(value): + """atom = [CFWS] 1*atext [CFWS] + + """ + atom = Atom() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + atom.append(token) + if value and value[0] in ATOM_ENDS: + raise errors.HeaderParseError( + "expected atom but found '{}'".format(value)) + token, value = get_atext(value) + atom.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + atom.append(token) + return atom, value + +def get_dot_atom_text(value): + """ dot-text = 1*atext *("." 1*atext) + + """ + dot_atom_text = DotAtomText() + if not value or value[0] in ATOM_ENDS: + raise errors.HeaderParseError("expected atom at a start of " + "dot-atom-text but found '{}'".format(value)) + while value and value[0] not in ATOM_ENDS: + token, value = get_atext(value) + dot_atom_text.append(token) + if value and value[0] == '.': + dot_atom_text.append(DOT) + value = value[1:] + if dot_atom_text[-1] is DOT: + raise errors.HeaderParseError("expected atom at end of dot-atom-text " + "but found '{}'".format('.'+value)) + return dot_atom_text, value + +def get_dot_atom(value): + """ dot-atom = [CFWS] dot-atom-text [CFWS] + + """ + dot_atom = DotAtom() + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + dot_atom.append(token) + token, value = get_dot_atom_text(value) + dot_atom.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + dot_atom.append(token) + return dot_atom, value + +def get_word(value): + """word = atom / quoted-string + + Either atom or quoted-string may start with CFWS. We have to peel off this + CFWS first to determine which type of word to parse. Afterward we splice + the leading CFWS, if any, into the parsed sub-token. + + If neither an atom or a quoted-string is found before the next special, a + HeaderParseError is raised. + + The token returned is either an Atom or a QuotedString, as appropriate. + This means the 'word' level of the formal grammar is not represented in the + parse tree; this is because having that extra layer when manipulating the + parse tree is more confusing than it is helpful. + + """ + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + else: + leader = None + if value[0]=='"': + token, value = get_quoted_string(value) + elif value[0] in SPECIALS: + raise errors.HeaderParseError("Expected 'atom' or 'quoted-string' " + "but found '{}'".format(value)) + else: + token, value = get_atom(value) + if leader is not None: + token[:0] = [leader] + return token, value + +def get_phrase(value): + """ phrase = 1*word / obs-phrase + obs-phrase = word *(word / "." / CFWS) + + This means a phrase can be a sequence of words, periods, and CFWS in any + order as long as it starts with at least one word. If anything other than + words is detected, an ObsoleteHeaderDefect is added to the token's defect + list. We also accept a phrase that starts with CFWS followed by a dot; + this is registered as an InvalidHeaderDefect, since it is not supported by + even the obsolete grammar. + + """ + phrase = Phrase() + try: + token, value = get_word(value) + phrase.append(token) + except errors.HeaderParseError: + phrase.defects.append(errors.InvalidHeaderDefect( + "phrase does not start with word")) + while value and value[0] not in PHRASE_ENDS: + if value[0]=='.': + phrase.append(DOT) + phrase.defects.append(errors.ObsoleteHeaderDefect( + "period in 'phrase'")) + value = value[1:] + else: + try: + token, value = get_word(value) + except errors.HeaderParseError: + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + phrase.defects.append(errors.ObsoleteHeaderDefect( + "comment found without atom")) + else: + raise + phrase.append(token) + return phrase, value + +def get_local_part(value): + """ local-part = dot-atom / quoted-string / obs-local-part + + """ + local_part = LocalPart() + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + raise errors.HeaderParseError( + "expected local-part but found '{}'".format(value)) + try: + token, value = get_dot_atom(value) + except errors.HeaderParseError: + try: + token, value = get_word(value) + except errors.HeaderParseError: + if value[0] != '\\' and value[0] in PHRASE_ENDS: + raise + token = TokenList() + if leader is not None: + token[:0] = [leader] + local_part.append(token) + if value and (value[0]=='\\' or value[0] not in PHRASE_ENDS): + obs_local_part, value = get_obs_local_part(str(local_part) + value) + if obs_local_part.token_type == 'invalid-obs-local-part': + local_part.defects.append(errors.InvalidHeaderDefect( + "local-part is not dot-atom, quoted-string, or obs-local-part")) + else: + local_part.defects.append(errors.ObsoleteHeaderDefect( + "local-part is not a dot-atom (contains CFWS)")) + local_part[0] = obs_local_part + try: + local_part.value.encode('ascii') + except UnicodeEncodeError: + local_part.defects.append(errors.NonASCIILocalPartDefect( + "local-part contains non-ASCII characters)")) + return local_part, value + +def get_obs_local_part(value): + """ obs-local-part = word *("." word) + """ + obs_local_part = ObsLocalPart() + last_non_ws_was_dot = False + while value and (value[0]=='\\' or value[0] not in PHRASE_ENDS): + if value[0] == '.': + if last_non_ws_was_dot: + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "invalid repeated '.'")) + obs_local_part.append(DOT) + last_non_ws_was_dot = True + value = value[1:] + continue + elif value[0]=='\\': + obs_local_part.append(ValueTerminal(value[0], + 'misplaced-special')) + value = value[1:] + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "'\\' character outside of quoted-string/ccontent")) + last_non_ws_was_dot = False + continue + if obs_local_part and obs_local_part[-1].token_type != 'dot': + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "missing '.' between words")) + try: + token, value = get_word(value) + last_non_ws_was_dot = False + except errors.HeaderParseError: + if value[0] not in CFWS_LEADER: + raise + token, value = get_cfws(value) + obs_local_part.append(token) + if (obs_local_part[0].token_type == 'dot' or + obs_local_part[0].token_type=='cfws' and + obs_local_part[1].token_type=='dot'): + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "Invalid leading '.' in local part")) + if (obs_local_part[-1].token_type == 'dot' or + obs_local_part[-1].token_type=='cfws' and + obs_local_part[-2].token_type=='dot'): + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "Invalid trailing '.' in local part")) + if obs_local_part.defects: + obs_local_part.token_type = 'invalid-obs-local-part' + return obs_local_part, value + +def get_dtext(value): + """ dtext = / obs-dtext + obs-dtext = obs-NO-WS-CTL / quoted-pair + + We allow anything except the excluded characters, but if we find any + ASCII other than the RFC defined printable ASCII an NonPrintableDefect is + added to the token's defects list. Quoted pairs are converted to their + unquoted values, so what is returned is a ptext token, in this case a + ValueTerminal. If there were quoted-printables, an ObsoleteHeaderDefect is + added to the returned token's defect list. + + """ + ptext, value, had_qp = _get_ptext_to_endchars(value, '[]') + ptext = ValueTerminal(ptext, 'ptext') + if had_qp: + ptext.defects.append(errors.ObsoleteHeaderDefect( + "quoted printable found in domain-literal")) + _validate_xtext(ptext) + return ptext, value + +def _check_for_early_dl_end(value, domain_literal): + if value: + return False + domain_literal.append(errors.InvalidHeaderDefect( + "end of input inside domain-literal")) + domain_literal.append(ValueTerminal(']', 'domain-literal-end')) + return True + +def get_domain_literal(value): + """ domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] + + """ + domain_literal = DomainLiteral() + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + domain_literal.append(token) + if not value: + raise errors.HeaderParseError("expected domain-literal") + if value[0] != '[': + raise errors.HeaderParseError("expected '[' at start of domain-literal " + "but found '{}'".format(value)) + value = value[1:] + if _check_for_early_dl_end(value, domain_literal): + return domain_literal, value + domain_literal.append(ValueTerminal('[', 'domain-literal-start')) + if value[0] in WSP: + token, value = get_fws(value) + domain_literal.append(token) + token, value = get_dtext(value) + domain_literal.append(token) + if _check_for_early_dl_end(value, domain_literal): + return domain_literal, value + if value[0] in WSP: + token, value = get_fws(value) + domain_literal.append(token) + if _check_for_early_dl_end(value, domain_literal): + return domain_literal, value + if value[0] != ']': + raise errors.HeaderParseError("expected ']' at end of domain-literal " + "but found '{}'".format(value)) + domain_literal.append(ValueTerminal(']', 'domain-literal-end')) + value = value[1:] + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + domain_literal.append(token) + return domain_literal, value + +def get_domain(value): + """ domain = dot-atom / domain-literal / obs-domain + obs-domain = atom *("." atom)) + + """ + domain = Domain() + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + raise errors.HeaderParseError( + "expected domain but found '{}'".format(value)) + if value[0] == '[': + token, value = get_domain_literal(value) + if leader is not None: + token[:0] = [leader] + domain.append(token) + return domain, value + try: + token, value = get_dot_atom(value) + except errors.HeaderParseError: + token, value = get_atom(value) + if leader is not None: + token[:0] = [leader] + domain.append(token) + if value and value[0] == '.': + domain.defects.append(errors.ObsoleteHeaderDefect( + "domain is not a dot-atom (contains CFWS)")) + if domain[0].token_type == 'dot-atom': + domain[:] = domain[0] + while value and value[0] == '.': + domain.append(DOT) + token, value = get_atom(value[1:]) + domain.append(token) + return domain, value + +def get_addr_spec(value): + """ addr-spec = local-part "@" domain + + """ + addr_spec = AddrSpec() + token, value = get_local_part(value) + addr_spec.append(token) + if not value or value[0] != '@': + addr_spec.defects.append(errors.InvalidHeaderDefect( + "add-spec local part with no domain")) + return addr_spec, value + addr_spec.append(ValueTerminal('@', 'address-at-symbol')) + token, value = get_domain(value[1:]) + addr_spec.append(token) + return addr_spec, value + +def get_obs_route(value): + """ obs-route = obs-domain-list ":" + obs-domain-list = *(CFWS / ",") "@" domain *("," [CFWS] ["@" domain]) + + Returns an obs-route token with the appropriate sub-tokens (that is, + there is no obs-domain-list in the parse tree). + """ + obs_route = ObsRoute() + while value and (value[0]==',' or value[0] in CFWS_LEADER): + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + obs_route.append(token) + elif value[0] == ',': + obs_route.append(ListSeparator) + value = value[1:] + if not value or value[0] != '@': + raise errors.HeaderParseError( + "expected obs-route domain but found '{}'".format(value)) + obs_route.append(RouteComponentMarker) + token, value = get_domain(value[1:]) + obs_route.append(token) + while value and value[0]==',': + obs_route.append(ListSeparator) + value = value[1:] + if not value: + break + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + obs_route.append(token) + if value[0] == '@': + obs_route.append(RouteComponentMarker) + token, value = get_domain(value[1:]) + obs_route.append(token) + if not value: + raise errors.HeaderParseError("end of header while parsing obs-route") + if value[0] != ':': + raise errors.HeaderParseError( "expected ':' marking end of " + "obs-route but found '{}'".format(value)) + obs_route.append(ValueTerminal(':', 'end-of-obs-route-marker')) + return obs_route, value[1:] + +def get_angle_addr(value): + """ angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr + obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS] + + """ + angle_addr = AngleAddr() + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + angle_addr.append(token) + if not value or value[0] != '<': + raise errors.HeaderParseError( + "expected angle-addr but found '{}'".format(value)) + angle_addr.append(ValueTerminal('<', 'angle-addr-start')) + value = value[1:] + # Although it is not legal per RFC5322, SMTP uses '<>' in certain + # circumstances. + if value[0] == '>': + angle_addr.append(ValueTerminal('>', 'angle-addr-end')) + angle_addr.defects.append(errors.InvalidHeaderDefect( + "null addr-spec in angle-addr")) + value = value[1:] + return angle_addr, value + try: + token, value = get_addr_spec(value) + except errors.HeaderParseError: + try: + token, value = get_obs_route(value) + angle_addr.defects.append(errors.ObsoleteHeaderDefect( + "obsolete route specification in angle-addr")) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected addr-spec or obs-route but found '{}'".format(value)) + angle_addr.append(token) + token, value = get_addr_spec(value) + angle_addr.append(token) + if value and value[0] == '>': + value = value[1:] + else: + angle_addr.defects.append(errors.InvalidHeaderDefect( + "missing trailing '>' on angle-addr")) + angle_addr.append(ValueTerminal('>', 'angle-addr-end')) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + angle_addr.append(token) + return angle_addr, value + +def get_display_name(value): + """ display-name = phrase + + Because this is simply a name-rule, we don't return a display-name + token containing a phrase, but rather a display-name token with + the content of the phrase. + + """ + display_name = DisplayName() + token, value = get_phrase(value) + display_name.extend(token[:]) + display_name.defects = token.defects[:] + return display_name, value + + +def get_name_addr(value): + """ name-addr = [display-name] angle-addr + + """ + name_addr = NameAddr() + # Both the optional display name and the angle-addr can start with cfws. + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + raise errors.HeaderParseError( + "expected name-addr but found '{}'".format(leader)) + if value[0] != '<': + if value[0] in PHRASE_ENDS: + raise errors.HeaderParseError( + "expected name-addr but found '{}'".format(value)) + token, value = get_display_name(value) + if not value: + raise errors.HeaderParseError( + "expected name-addr but found '{}'".format(token)) + if leader is not None: + token[0][:0] = [leader] + leader = None + name_addr.append(token) + token, value = get_angle_addr(value) + if leader is not None: + token[:0] = [leader] + name_addr.append(token) + return name_addr, value + +def get_mailbox(value): + """ mailbox = name-addr / addr-spec + + """ + # The only way to figure out if we are dealing with a name-addr or an + # addr-spec is to try parsing each one. + mailbox = Mailbox() + try: + token, value = get_name_addr(value) + except errors.HeaderParseError: + try: + token, value = get_addr_spec(value) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected mailbox but found '{}'".format(value)) + if any(isinstance(x, errors.InvalidHeaderDefect) + for x in token.all_defects): + mailbox.token_type = 'invalid-mailbox' + mailbox.append(token) + return mailbox, value + +def get_invalid_mailbox(value, endchars): + """ Read everything up to one of the chars in endchars. + + This is outside the formal grammar. The InvalidMailbox TokenList that is + returned acts like a Mailbox, but the data attributes are None. + + """ + invalid_mailbox = InvalidMailbox() + while value and value[0] not in endchars: + if value[0] in PHRASE_ENDS: + invalid_mailbox.append(ValueTerminal(value[0], + 'misplaced-special')) + value = value[1:] + else: + token, value = get_phrase(value) + invalid_mailbox.append(token) + return invalid_mailbox, value + +def get_mailbox_list(value): + """ mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list + obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS]) + + For this routine we go outside the formal grammar in order to improve error + handling. We recognize the end of the mailbox list only at the end of the + value or at a ';' (the group terminator). This is so that we can turn + invalid mailboxes into InvalidMailbox tokens and continue parsing any + remaining valid mailboxes. We also allow all mailbox entries to be null, + and this condition is handled appropriately at a higher level. + + """ + mailbox_list = MailboxList() + while value and value[0] != ';': + try: + token, value = get_mailbox(value) + mailbox_list.append(token) + except errors.HeaderParseError: + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value or value[0] in ',;': + mailbox_list.append(leader) + mailbox_list.defects.append(errors.ObsoleteHeaderDefect( + "empty element in mailbox-list")) + else: + token, value = get_invalid_mailbox(value, ',;') + if leader is not None: + token[:0] = [leader] + mailbox_list.append(token) + mailbox_list.defects.append(errors.InvalidHeaderDefect( + "invalid mailbox in mailbox-list")) + elif value[0] == ',': + mailbox_list.defects.append(errors.ObsoleteHeaderDefect( + "empty element in mailbox-list")) + else: + token, value = get_invalid_mailbox(value, ',;') + if leader is not None: + token[:0] = [leader] + mailbox_list.append(token) + mailbox_list.defects.append(errors.InvalidHeaderDefect( + "invalid mailbox in mailbox-list")) + if value and value[0] not in ',;': + # Crap after mailbox; treat it as an invalid mailbox. + # The mailbox info will still be available. + mailbox = mailbox_list[-1] + mailbox.token_type = 'invalid-mailbox' + token, value = get_invalid_mailbox(value, ',;') + mailbox.extend(token) + mailbox_list.defects.append(errors.InvalidHeaderDefect( + "invalid mailbox in mailbox-list")) + if value and value[0] == ',': + mailbox_list.append(ListSeparator) + value = value[1:] + return mailbox_list, value + + +def get_group_list(value): + """ group-list = mailbox-list / CFWS / obs-group-list + obs-group-list = 1*([CFWS] ",") [CFWS] + + """ + group_list = GroupList() + if not value: + group_list.defects.append(errors.InvalidHeaderDefect( + "end of header before group-list")) + return group_list, value + leader = None + if value and value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + # This should never happen in email parsing, since CFWS-only is a + # legal alternative to group-list in a group, which is the only + # place group-list appears. + group_list.defects.append(errors.InvalidHeaderDefect( + "end of header in group-list")) + group_list.append(leader) + return group_list, value + if value[0] == ';': + group_list.append(leader) + return group_list, value + token, value = get_mailbox_list(value) + if len(token.all_mailboxes)==0: + if leader is not None: + group_list.append(leader) + group_list.extend(token) + group_list.defects.append(errors.ObsoleteHeaderDefect( + "group-list with empty entries")) + return group_list, value + if leader is not None: + token[:0] = [leader] + group_list.append(token) + return group_list, value + +def get_group(value): + """ group = display-name ":" [group-list] ";" [CFWS] + + """ + group = Group() + token, value = get_display_name(value) + if not value or value[0] != ':': + raise errors.HeaderParseError("expected ':' at end of group " + "display name but found '{}'".format(value)) + group.append(token) + group.append(ValueTerminal(':', 'group-display-name-terminator')) + value = value[1:] + if value and value[0] == ';': + group.append(ValueTerminal(';', 'group-terminator')) + return group, value[1:] + token, value = get_group_list(value) + group.append(token) + if not value: + group.defects.append(errors.InvalidHeaderDefect( + "end of header in group")) + if value[0] != ';': + raise errors.HeaderParseError( + "expected ';' at end of group but found {}".format(value)) + group.append(ValueTerminal(';', 'group-terminator')) + value = value[1:] + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + group.append(token) + return group, value + +def get_address(value): + """ address = mailbox / group + + Note that counter-intuitively, an address can be either a single address or + a list of addresses (a group). This is why the returned Address object has + a 'mailboxes' attribute which treats a single address as a list of length + one. When you need to differentiate between to two cases, extract the single + element, which is either a mailbox or a group token. + + """ + # The formal grammar isn't very helpful when parsing an address. mailbox + # and group, especially when allowing for obsolete forms, start off very + # similarly. It is only when you reach one of @, <, or : that you know + # what you've got. So, we try each one in turn, starting with the more + # likely of the two. We could perhaps make this more efficient by looking + # for a phrase and then branching based on the next character, but that + # would be a premature optimization. + address = Address() + try: + token, value = get_group(value) + except errors.HeaderParseError: + try: + token, value = get_mailbox(value) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected address but found '{}'".format(value)) + address.append(token) + return address, value + +def get_address_list(value): + """ address_list = (address *("," address)) / obs-addr-list + obs-addr-list = *([CFWS] ",") address *("," [address / CFWS]) + + We depart from the formal grammar here by continuing to parse until the end + of the input, assuming the input to be entirely composed of an + address-list. This is always true in email parsing, and allows us + to skip invalid addresses to parse additional valid ones. + + """ + address_list = AddressList() + while value: + try: + token, value = get_address(value) + address_list.append(token) + except errors.HeaderParseError as err: + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value or value[0] == ',': + address_list.append(leader) + address_list.defects.append(errors.ObsoleteHeaderDefect( + "address-list entry with no content")) + else: + token, value = get_invalid_mailbox(value, ',') + if leader is not None: + token[:0] = [leader] + address_list.append(Address([token])) + address_list.defects.append(errors.InvalidHeaderDefect( + "invalid address in address-list")) + elif value[0] == ',': + address_list.defects.append(errors.ObsoleteHeaderDefect( + "empty element in address-list")) + else: + token, value = get_invalid_mailbox(value, ',') + if leader is not None: + token[:0] = [leader] + address_list.append(Address([token])) + address_list.defects.append(errors.InvalidHeaderDefect( + "invalid address in address-list")) + if value and value[0] != ',': + # Crap after address; treat it as an invalid mailbox. + # The mailbox info will still be available. + mailbox = address_list[-1][0] + mailbox.token_type = 'invalid-mailbox' + token, value = get_invalid_mailbox(value, ',') + mailbox.extend(token) + address_list.defects.append(errors.InvalidHeaderDefect( + "invalid address in address-list")) + if value: # Must be a , at this point. + address_list.append(ValueTerminal(',', 'list-separator')) + value = value[1:] + return address_list, value + +# +# XXX: As I begin to add additional header parsers, I'm realizing we probably +# have two level of parser routines: the get_XXX methods that get a token in +# the grammar, and parse_XXX methods that parse an entire field value. So +# get_address_list above should really be a parse_ method, as probably should +# be get_unstructured. +# + +def parse_mime_version(value): + """ mime-version = [CFWS] 1*digit [CFWS] "." [CFWS] 1*digit [CFWS] + + """ + # The [CFWS] is implicit in the RFC 2045 BNF. + # XXX: This routine is a bit verbose, should factor out a get_int method. + mime_version = MIMEVersion() + if not value: + mime_version.defects.append(errors.HeaderMissingRequiredValue( + "Missing MIME version number (eg: 1.0)")) + return mime_version + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + mime_version.append(token) + if not value: + mime_version.defects.append(errors.HeaderMissingRequiredValue( + "Expected MIME version number but found only CFWS")) + digits = '' + while value and value[0] != '.' and value[0] not in CFWS_LEADER: + digits += value[0] + value = value[1:] + if not digits.isdigit(): + mime_version.defects.append(errors.InvalidHeaderDefect( + "Expected MIME major version number but found {!r}".format(digits))) + mime_version.append(ValueTerminal(digits, 'xtext')) + else: + mime_version.major = int(digits) + mime_version.append(ValueTerminal(digits, 'digits')) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + mime_version.append(token) + if not value or value[0] != '.': + if mime_version.major is not None: + mime_version.defects.append(errors.InvalidHeaderDefect( + "Incomplete MIME version; found only major number")) + if value: + mime_version.append(ValueTerminal(value, 'xtext')) + return mime_version + mime_version.append(ValueTerminal('.', 'version-separator')) + value = value[1:] + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + mime_version.append(token) + if not value: + if mime_version.major is not None: + mime_version.defects.append(errors.InvalidHeaderDefect( + "Incomplete MIME version; found only major number")) + return mime_version + digits = '' + while value and value[0] not in CFWS_LEADER: + digits += value[0] + value = value[1:] + if not digits.isdigit(): + mime_version.defects.append(errors.InvalidHeaderDefect( + "Expected MIME minor version number but found {!r}".format(digits))) + mime_version.append(ValueTerminal(digits, 'xtext')) + else: + mime_version.minor = int(digits) + mime_version.append(ValueTerminal(digits, 'digits')) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + mime_version.append(token) + if value: + mime_version.defects.append(errors.InvalidHeaderDefect( + "Excess non-CFWS text after MIME version")) + mime_version.append(ValueTerminal(value, 'xtext')) + return mime_version + +def get_invalid_parameter(value): + """ Read everything up to the next ';'. + + This is outside the formal grammar. The InvalidParameter TokenList that is + returned acts like a Parameter, but the data attributes are None. + + """ + invalid_parameter = InvalidParameter() + while value and value[0] != ';': + if value[0] in PHRASE_ENDS: + invalid_parameter.append(ValueTerminal(value[0], + 'misplaced-special')) + value = value[1:] + else: + token, value = get_phrase(value) + invalid_parameter.append(token) + return invalid_parameter, value + +def get_ttext(value): + """ttext = + + We allow any non-TOKEN_ENDS in ttext, but add defects to the token's + defects list if we find non-ttext characters. We also register defects for + *any* non-printables even though the RFC doesn't exclude all of them, + because we follow the spirit of RFC 5322. + + """ + m = _non_token_end_matcher(value) + if not m: + raise errors.HeaderParseError( + "expected ttext but found '{}'".format(value)) + ttext = m.group() + value = value[len(ttext):] + ttext = ValueTerminal(ttext, 'ttext') + _validate_xtext(ttext) + return ttext, value + +def get_token(value): + """token = [CFWS] 1*ttext [CFWS] + + The RFC equivalent of ttext is any US-ASCII chars except space, ctls, or + tspecials. We also exclude tabs even though the RFC doesn't. + + The RFC implies the CFWS but is not explicit about it in the BNF. + + """ + mtoken = Token() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + mtoken.append(token) + if value and value[0] in TOKEN_ENDS: + raise errors.HeaderParseError( + "expected token but found '{}'".format(value)) + token, value = get_ttext(value) + mtoken.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + mtoken.append(token) + return mtoken, value + +def get_attrtext(value): + """attrtext = 1*(any non-ATTRIBUTE_ENDS character) + + We allow any non-ATTRIBUTE_ENDS in attrtext, but add defects to the + token's defects list if we find non-attrtext characters. We also register + defects for *any* non-printables even though the RFC doesn't exclude all of + them, because we follow the spirit of RFC 5322. + + """ + m = _non_attribute_end_matcher(value) + if not m: + raise errors.HeaderParseError( + "expected attrtext but found {!r}".format(value)) + attrtext = m.group() + value = value[len(attrtext):] + attrtext = ValueTerminal(attrtext, 'attrtext') + _validate_xtext(attrtext) + return attrtext, value + +def get_attribute(value): + """ [CFWS] 1*attrtext [CFWS] + + This version of the BNF makes the CFWS explicit, and as usual we use a + value terminal for the actual run of characters. The RFC equivalent of + attrtext is the token characters, with the subtraction of '*', "'", and '%'. + We include tab in the excluded set just as we do for token. + + """ + attribute = Attribute() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + attribute.append(token) + if value and value[0] in ATTRIBUTE_ENDS: + raise errors.HeaderParseError( + "expected token but found '{}'".format(value)) + token, value = get_attrtext(value) + attribute.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + attribute.append(token) + return attribute, value + +def get_extended_attrtext(value): + """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') + + This is a special parsing routine so that we get a value that + includes % escapes as a single string (which we decode as a single + string later). + + """ + m = _non_extended_attribute_end_matcher(value) + if not m: + raise errors.HeaderParseError( + "expected extended attrtext but found {!r}".format(value)) + attrtext = m.group() + value = value[len(attrtext):] + attrtext = ValueTerminal(attrtext, 'extended-attrtext') + _validate_xtext(attrtext) + return attrtext, value + +def get_extended_attribute(value): + """ [CFWS] 1*extended_attrtext [CFWS] + + This is like the non-extended version except we allow % characters, so that + we can pick up an encoded value as a single string. + + """ + # XXX: should we have an ExtendedAttribute TokenList? + attribute = Attribute() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + attribute.append(token) + if value and value[0] in EXTENDED_ATTRIBUTE_ENDS: + raise errors.HeaderParseError( + "expected token but found '{}'".format(value)) + token, value = get_extended_attrtext(value) + attribute.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + attribute.append(token) + return attribute, value + +def get_section(value): + """ '*' digits + + The formal BNF is more complicated because leading 0s are not allowed. We + check for that and add a defect. We also assume no CFWS is allowed between + the '*' and the digits, though the RFC is not crystal clear on that. + The caller should already have dealt with leading CFWS. + + """ + section = Section() + if not value or value[0] != '*': + raise errors.HeaderParseError("Expected section but found {}".format( + value)) + section.append(ValueTerminal('*', 'section-marker')) + value = value[1:] + if not value or not value[0].isdigit(): + raise errors.HeaderParseError("Expected section number but " + "found {}".format(value)) + digits = '' + while value and value[0].isdigit(): + digits += value[0] + value = value[1:] + if digits[0] == '0' and digits != '0': + section.defects.append(errors.InvalidHeaderError("section number" + "has an invalid leading 0")) + section.number = int(digits) + section.append(ValueTerminal(digits, 'digits')) + return section, value + + +def get_value(value): + """ quoted-string / attribute + + """ + v = Value() + if not value: + raise errors.HeaderParseError("Expected value but found end of string") + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + raise errors.HeaderParseError("Expected value but found " + "only {}".format(leader)) + if value[0] == '"': + token, value = get_quoted_string(value) + else: + token, value = get_extended_attribute(value) + if leader is not None: + token[:0] = [leader] + v.append(token) + return v, value + +def get_parameter(value): + """ attribute [section] ["*"] [CFWS] "=" value + + The CFWS is implied by the RFC but not made explicit in the BNF. This + simplified form of the BNF from the RFC is made to conform with the RFC BNF + through some extra checks. We do it this way because it makes both error + recovery and working with the resulting parse tree easier. + """ + # It is possible CFWS would also be implicitly allowed between the section + # and the 'extended-attribute' marker (the '*') , but we've never seen that + # in the wild and we will therefore ignore the possibility. + param = Parameter() + token, value = get_attribute(value) + param.append(token) + if not value or value[0] == ';': + param.defects.append(errors.InvalidHeaderDefect("Parameter contains " + "name ({}) but no value".format(token))) + return param, value + if value[0] == '*': + try: + token, value = get_section(value) + param.sectioned = True + param.append(token) + except errors.HeaderParseError: + pass + if not value: + raise errors.HeaderParseError("Incomplete parameter") + if value[0] == '*': + param.append(ValueTerminal('*', 'extended-parameter-marker')) + value = value[1:] + param.extended = True + if value[0] != '=': + raise errors.HeaderParseError("Parameter not followed by '='") + param.append(ValueTerminal('=', 'parameter-separator')) + value = value[1:] + leader = None + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + param.append(token) + remainder = None + appendto = param + if param.extended and value and value[0] == '"': + # Now for some serious hackery to handle the common invalid case of + # double quotes around an extended value. We also accept (with defect) + # a value marked as encoded that isn't really. + qstring, remainder = get_quoted_string(value) + inner_value = qstring.stripped_value + semi_valid = False + if param.section_number == 0: + if inner_value and inner_value[0] == "'": + semi_valid = True + else: + token, rest = get_attrtext(inner_value) + if rest and rest[0] == "'": + semi_valid = True + else: + try: + token, rest = get_extended_attrtext(inner_value) + except: + pass + else: + if not rest: + semi_valid = True + if semi_valid: + param.defects.append(errors.InvalidHeaderDefect( + "Quoted string value for extended parameter is invalid")) + param.append(qstring) + for t in qstring: + if t.token_type == 'bare-quoted-string': + t[:] = [] + appendto = t + break + value = inner_value + else: + remainder = None + param.defects.append(errors.InvalidHeaderDefect( + "Parameter marked as extended but appears to have a " + "quoted string value that is non-encoded")) + if value and value[0] == "'": + token = None + else: + token, value = get_value(value) + if not param.extended or param.section_number > 0: + if not value or value[0] != "'": + appendto.append(token) + if remainder is not None: + assert not value, value + value = remainder + return param, value + param.defects.append(errors.InvalidHeaderDefect( + "Apparent initial-extended-value but attribute " + "was not marked as extended or was not initial section")) + if not value: + # Assume the charset/lang is missing and the token is the value. + param.defects.append(errors.InvalidHeaderDefect( + "Missing required charset/lang delimiters")) + appendto.append(token) + if remainder is None: + return param, value + else: + if token is not None: + for t in token: + if t.token_type == 'extended-attrtext': + break + t.token_type == 'attrtext' + appendto.append(t) + param.charset = t.value + if value[0] != "'": + raise errors.HeaderParseError("Expected RFC2231 char/lang encoding " + "delimiter, but found {!r}".format(value)) + appendto.append(ValueTerminal("'", 'RFC2231 delimiter')) + value = value[1:] + if value and value[0] != "'": + token, value = get_attrtext(value) + appendto.append(token) + param.lang = token.value + if not value or value[0] != "'": + raise errors.HeaderParseError("Expected RFC2231 char/lang encoding " + "delimiter, but found {}".format(value)) + appendto.append(ValueTerminal("'", 'RFC2231 delimiter')) + value = value[1:] + if remainder is not None: + # Treat the rest of value as bare quoted string content. + v = Value() + while value: + if value[0] in WSP: + token, value = get_fws(value) + else: + token, value = get_qcontent(value) + v.append(token) + token = v + else: + token, value = get_value(value) + appendto.append(token) + if remainder is not None: + assert not value, value + value = remainder + return param, value + +def parse_mime_parameters(value): + """ parameter *( ";" parameter ) + + That BNF is meant to indicate this routine should only be called after + finding and handling the leading ';'. There is no corresponding rule in + the formal RFC grammar, but it is more convenient for us for the set of + parameters to be treated as its own TokenList. + + This is 'parse' routine because it consumes the reminaing value, but it + would never be called to parse a full header. Instead it is called to + parse everything after the non-parameter value of a specific MIME header. + + """ + mime_parameters = MimeParameters() + while value: + try: + token, value = get_parameter(value) + mime_parameters.append(token) + except errors.HeaderParseError as err: + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + mime_parameters.append(leader) + return mime_parameters + if value[0] == ';': + if leader is not None: + mime_parameters.append(leader) + mime_parameters.defects.append(errors.InvalidHeaderDefect( + "parameter entry with no content")) + else: + token, value = get_invalid_parameter(value) + if leader: + token[:0] = [leader] + mime_parameters.append(token) + mime_parameters.defects.append(errors.InvalidHeaderDefect( + "invalid parameter {!r}".format(token))) + if value and value[0] != ';': + # Junk after the otherwise valid parameter. Mark it as + # invalid, but it will have a value. + param = mime_parameters[-1] + param.token_type = 'invalid-parameter' + token, value = get_invalid_parameter(value) + param.extend(token) + mime_parameters.defects.append(errors.InvalidHeaderDefect( + "parameter with invalid trailing text {!r}".format(token))) + if value: + # Must be a ';' at this point. + mime_parameters.append(ValueTerminal(';', 'parameter-separator')) + value = value[1:] + return mime_parameters + +def _find_mime_parameters(tokenlist, value): + """Do our best to find the parameters in an invalid MIME header + + """ + while value and value[0] != ';': + if value[0] in PHRASE_ENDS: + tokenlist.append(ValueTerminal(value[0], 'misplaced-special')) + value = value[1:] + else: + token, value = get_phrase(value) + tokenlist.append(token) + if not value: + return + tokenlist.append(ValueTerminal(';', 'parameter-separator')) + tokenlist.append(parse_mime_parameters(value[1:])) + +def parse_content_type_header(value): + """ maintype "/" subtype *( ";" parameter ) + + The maintype and substype are tokens. Theoretically they could + be checked against the official IANA list + x-token, but we + don't do that. + """ + ctype = ContentType() + recover = False + if not value: + ctype.defects.append(errors.HeaderMissingRequiredValue( + "Missing content type specification")) + return ctype + try: + token, value = get_token(value) + except errors.HeaderParseError: + ctype.defects.append(errors.InvalidHeaderDefect( + "Expected content maintype but found {!r}".format(value))) + _find_mime_parameters(ctype, value) + return ctype + ctype.append(token) + # XXX: If we really want to follow the formal grammar we should make + # mantype and subtype specialized TokenLists here. Probably not worth it. + if not value or value[0] != '/': + ctype.defects.append(errors.InvalidHeaderDefect( + "Invalid content type")) + if value: + _find_mime_parameters(ctype, value) + return ctype + ctype.maintype = token.value.strip().lower() + ctype.append(ValueTerminal('/', 'content-type-separator')) + value = value[1:] + try: + token, value = get_token(value) + except errors.HeaderParseError: + ctype.defects.append(errors.InvalidHeaderDefect( + "Expected content subtype but found {!r}".format(value))) + _find_mime_parameters(ctype, value) + return ctype + ctype.append(token) + ctype.subtype = token.value.strip().lower() + if not value: + return ctype + if value[0] != ';': + ctype.defects.append(errors.InvalidHeaderDefect( + "Only parameters are valid after content type, but " + "found {!r}".format(value))) + # The RFC requires that a syntactically invalid content-type be treated + # as text/plain. Perhaps we should postel this, but we should probably + # only do that if we were checking the subtype value against IANA. + del ctype.maintype, ctype.subtype + _find_mime_parameters(ctype, value) + return ctype + ctype.append(ValueTerminal(';', 'parameter-separator')) + ctype.append(parse_mime_parameters(value[1:])) + return ctype + +def parse_content_disposition_header(value): + """ disposition-type *( ";" parameter ) + + """ + disp_header = ContentDisposition() + if not value: + disp_header.defects.append(errors.HeaderMissingRequiredValue( + "Missing content disposition")) + return disp_header + try: + token, value = get_token(value) + except errors.HeaderParseError: + ctype.defects.append(errors.InvalidHeaderDefect( + "Expected content disposition but found {!r}".format(value))) + _find_mime_parameters(disp_header, value) + return disp_header + disp_header.append(token) + disp_header.content_disposition = token.value.strip().lower() + if not value: + return disp_header + if value[0] != ';': + disp_header.defects.append(errors.InvalidHeaderDefect( + "Only parameters are valid after content disposition, but " + "found {!r}".format(value))) + _find_mime_parameters(disp_header, value) + return disp_header + disp_header.append(ValueTerminal(';', 'parameter-separator')) + disp_header.append(parse_mime_parameters(value[1:])) + return disp_header + +def parse_content_transfer_encoding_header(value): + """ mechanism + + """ + # We should probably validate the values, since the list is fixed. + cte_header = ContentTransferEncoding() + if not value: + cte_header.defects.append(errors.HeaderMissingRequiredValue( + "Missing content transfer encoding")) + return cte_header + try: + token, value = get_token(value) + except errors.HeaderParseError: + ctype.defects.append(errors.InvalidHeaderDefect( + "Expected content trnasfer encoding but found {!r}".format(value))) + else: + cte_header.append(token) + cte_header.cte = token.value.strip().lower() + if not value: + return cte_header + while value: + cte_header.defects.append(errors.InvalidHeaderDefect( + "Extra text after content transfer encoding")) + if value[0] in PHRASE_ENDS: + cte_header.append(ValueTerminal(value[0], 'misplaced-special')) + value = value[1:] + else: + token, value = get_phrase(value) + cte_header.append(token) + return cte_header diff --git a/src/future/backports/email/_parseaddr.py b/src/future/backports/email/_parseaddr.py new file mode 100644 index 00000000..5b50cc6b --- /dev/null +++ b/src/future/backports/email/_parseaddr.py @@ -0,0 +1,546 @@ +# Copyright (C) 2002-2007 Python Software Foundation +# Contact: email-sig@python.org + +"""Email address parsing code. + +Lifted directly from rfc822.py. This should eventually be rewritten. +""" + +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from future.builtins import int + +__all__ = [ + 'mktime_tz', + 'parsedate', + 'parsedate_tz', + 'quote', + ] + +import time, calendar + +SPACE = ' ' +EMPTYSTRING = '' +COMMASPACE = ', ' + +# Parse a date field +_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', + 'aug', 'sep', 'oct', 'nov', 'dec', + 'january', 'february', 'march', 'april', 'may', 'june', 'july', + 'august', 'september', 'october', 'november', 'december'] + +_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] + +# The timezone table does not include the military time zones defined +# in RFC822, other than Z. According to RFC1123, the description in +# RFC822 gets the signs wrong, so we can't rely on any such time +# zones. RFC1123 recommends that numeric timezone indicators be used +# instead of timezone names. + +_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, + 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) + 'EST': -500, 'EDT': -400, # Eastern + 'CST': -600, 'CDT': -500, # Central + 'MST': -700, 'MDT': -600, # Mountain + 'PST': -800, 'PDT': -700 # Pacific + } + + +def parsedate_tz(data): + """Convert a date string to a time tuple. + + Accounts for military timezones. + """ + res = _parsedate_tz(data) + if not res: + return + if res[9] is None: + res[9] = 0 + return tuple(res) + +def _parsedate_tz(data): + """Convert date to extended time tuple. + + The last (additional) element is the time zone offset in seconds, except if + the timezone was specified as -0000. In that case the last element is + None. This indicates a UTC timestamp that explicitly declaims knowledge of + the source timezone, as opposed to a +0000 timestamp that indicates the + source timezone really was UTC. + + """ + if not data: + return + data = data.split() + # The FWS after the comma after the day-of-week is optional, so search and + # adjust for this. + if data[0].endswith(',') or data[0].lower() in _daynames: + # There's a dayname here. Skip it + del data[0] + else: + i = data[0].rfind(',') + if i >= 0: + data[0] = data[0][i+1:] + if len(data) == 3: # RFC 850 date, deprecated + stuff = data[0].split('-') + if len(stuff) == 3: + data = stuff + data[1:] + if len(data) == 4: + s = data[3] + i = s.find('+') + if i == -1: + i = s.find('-') + if i > 0: + data[3:] = [s[:i], s[i:]] + else: + data.append('') # Dummy tz + if len(data) < 5: + return None + data = data[:5] + [dd, mm, yy, tm, tz] = data + mm = mm.lower() + if mm not in _monthnames: + dd, mm = mm, dd.lower() + if mm not in _monthnames: + return None + mm = _monthnames.index(mm) + 1 + if mm > 12: + mm -= 12 + if dd[-1] == ',': + dd = dd[:-1] + i = yy.find(':') + if i > 0: + yy, tm = tm, yy + if yy[-1] == ',': + yy = yy[:-1] + if not yy[0].isdigit(): + yy, tz = tz, yy + if tm[-1] == ',': + tm = tm[:-1] + tm = tm.split(':') + if len(tm) == 2: + [thh, tmm] = tm + tss = '0' + elif len(tm) == 3: + [thh, tmm, tss] = tm + elif len(tm) == 1 and '.' in tm[0]: + # Some non-compliant MUAs use '.' to separate time elements. + tm = tm[0].split('.') + if len(tm) == 2: + [thh, tmm] = tm + tss = 0 + elif len(tm) == 3: + [thh, tmm, tss] = tm + else: + return None + try: + yy = int(yy) + dd = int(dd) + thh = int(thh) + tmm = int(tmm) + tss = int(tss) + except ValueError: + return None + # Check for a yy specified in two-digit format, then convert it to the + # appropriate four-digit format, according to the POSIX standard. RFC 822 + # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) + # mandates a 4-digit yy. For more information, see the documentation for + # the time module. + if yy < 100: + # The year is between 1969 and 1999 (inclusive). + if yy > 68: + yy += 1900 + # The year is between 2000 and 2068 (inclusive). + else: + yy += 2000 + tzoffset = None + tz = tz.upper() + if tz in _timezones: + tzoffset = _timezones[tz] + else: + try: + tzoffset = int(tz) + except ValueError: + pass + if tzoffset==0 and tz.startswith('-'): + tzoffset = None + # Convert a timezone offset into seconds ; -0500 -> -18000 + if tzoffset: + if tzoffset < 0: + tzsign = -1 + tzoffset = -tzoffset + else: + tzsign = 1 + tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) + # Daylight Saving Time flag is set to -1, since DST is unknown. + return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset] + + +def parsedate(data): + """Convert a time string to a time tuple.""" + t = parsedate_tz(data) + if isinstance(t, tuple): + return t[:9] + else: + return t + + +def mktime_tz(data): + """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp.""" + if data[9] is None: + # No zone info, so localtime is better assumption than GMT + return time.mktime(data[:8] + (-1,)) + else: + t = calendar.timegm(data) + return t - data[9] + + +def quote(str): + """Prepare string to be used in a quoted string. + + Turns backslash and double quote characters into quoted pairs. These + are the only characters that need to be quoted inside a quoted string. + Does not add the surrounding double quotes. + """ + return str.replace('\\', '\\\\').replace('"', '\\"') + + +class AddrlistClass(object): + """Address parser class by Ben Escoto. + + To understand what this class does, it helps to have a copy of RFC 2822 in + front of you. + + Note: this class interface is deprecated and may be removed in the future. + Use email.utils.AddressList instead. + """ + + def __init__(self, field): + """Initialize a new instance. + + `field' is an unparsed address header field, containing + one or more addresses. + """ + self.specials = '()<>@,:;.\"[]' + self.pos = 0 + self.LWS = ' \t' + self.CR = '\r\n' + self.FWS = self.LWS + self.CR + self.atomends = self.specials + self.LWS + self.CR + # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it + # is obsolete syntax. RFC 2822 requires that we recognize obsolete + # syntax, so allow dots in phrases. + self.phraseends = self.atomends.replace('.', '') + self.field = field + self.commentlist = [] + + def gotonext(self): + """Skip white space and extract comments.""" + wslist = [] + while self.pos < len(self.field): + if self.field[self.pos] in self.LWS + '\n\r': + if self.field[self.pos] not in '\n\r': + wslist.append(self.field[self.pos]) + self.pos += 1 + elif self.field[self.pos] == '(': + self.commentlist.append(self.getcomment()) + else: + break + return EMPTYSTRING.join(wslist) + + def getaddrlist(self): + """Parse all addresses. + + Returns a list containing all of the addresses. + """ + result = [] + while self.pos < len(self.field): + ad = self.getaddress() + if ad: + result += ad + else: + result.append(('', '')) + return result + + def getaddress(self): + """Parse the next address.""" + self.commentlist = [] + self.gotonext() + + oldpos = self.pos + oldcl = self.commentlist + plist = self.getphraselist() + + self.gotonext() + returnlist = [] + + if self.pos >= len(self.field): + # Bad email address technically, no domain. + if plist: + returnlist = [(SPACE.join(self.commentlist), plist[0])] + + elif self.field[self.pos] in '.@': + # email address is just an addrspec + # this isn't very efficient since we start over + self.pos = oldpos + self.commentlist = oldcl + addrspec = self.getaddrspec() + returnlist = [(SPACE.join(self.commentlist), addrspec)] + + elif self.field[self.pos] == ':': + # address is a group + returnlist = [] + + fieldlen = len(self.field) + self.pos += 1 + while self.pos < len(self.field): + self.gotonext() + if self.pos < fieldlen and self.field[self.pos] == ';': + self.pos += 1 + break + returnlist = returnlist + self.getaddress() + + elif self.field[self.pos] == '<': + # Address is a phrase then a route addr + routeaddr = self.getrouteaddr() + + if self.commentlist: + returnlist = [(SPACE.join(plist) + ' (' + + ' '.join(self.commentlist) + ')', routeaddr)] + else: + returnlist = [(SPACE.join(plist), routeaddr)] + + else: + if plist: + returnlist = [(SPACE.join(self.commentlist), plist[0])] + elif self.field[self.pos] in self.specials: + self.pos += 1 + + self.gotonext() + if self.pos < len(self.field) and self.field[self.pos] == ',': + self.pos += 1 + return returnlist + + def getrouteaddr(self): + """Parse a route address (Return-path value). + + This method just skips all the route stuff and returns the addrspec. + """ + if self.field[self.pos] != '<': + return + + expectroute = False + self.pos += 1 + self.gotonext() + adlist = '' + while self.pos < len(self.field): + if expectroute: + self.getdomain() + expectroute = False + elif self.field[self.pos] == '>': + self.pos += 1 + break + elif self.field[self.pos] == '@': + self.pos += 1 + expectroute = True + elif self.field[self.pos] == ':': + self.pos += 1 + else: + adlist = self.getaddrspec() + self.pos += 1 + break + self.gotonext() + + return adlist + + def getaddrspec(self): + """Parse an RFC 2822 addr-spec.""" + aslist = [] + + self.gotonext() + while self.pos < len(self.field): + preserve_ws = True + if self.field[self.pos] == '.': + if aslist and not aslist[-1].strip(): + aslist.pop() + aslist.append('.') + self.pos += 1 + preserve_ws = False + elif self.field[self.pos] == '"': + aslist.append('"%s"' % quote(self.getquote())) + elif self.field[self.pos] in self.atomends: + if aslist and not aslist[-1].strip(): + aslist.pop() + break + else: + aslist.append(self.getatom()) + ws = self.gotonext() + if preserve_ws and ws: + aslist.append(ws) + + if self.pos >= len(self.field) or self.field[self.pos] != '@': + return EMPTYSTRING.join(aslist) + + aslist.append('@') + self.pos += 1 + self.gotonext() + return EMPTYSTRING.join(aslist) + self.getdomain() + + def getdomain(self): + """Get the complete domain name from an address.""" + sdlist = [] + while self.pos < len(self.field): + if self.field[self.pos] in self.LWS: + self.pos += 1 + elif self.field[self.pos] == '(': + self.commentlist.append(self.getcomment()) + elif self.field[self.pos] == '[': + sdlist.append(self.getdomainliteral()) + elif self.field[self.pos] == '.': + self.pos += 1 + sdlist.append('.') + elif self.field[self.pos] in self.atomends: + break + else: + sdlist.append(self.getatom()) + return EMPTYSTRING.join(sdlist) + + def getdelimited(self, beginchar, endchars, allowcomments=True): + """Parse a header fragment delimited by special characters. + + `beginchar' is the start character for the fragment. + If self is not looking at an instance of `beginchar' then + getdelimited returns the empty string. + + `endchars' is a sequence of allowable end-delimiting characters. + Parsing stops when one of these is encountered. + + If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed + within the parsed fragment. + """ + if self.field[self.pos] != beginchar: + return '' + + slist = [''] + quote = False + self.pos += 1 + while self.pos < len(self.field): + if quote: + slist.append(self.field[self.pos]) + quote = False + elif self.field[self.pos] in endchars: + self.pos += 1 + break + elif allowcomments and self.field[self.pos] == '(': + slist.append(self.getcomment()) + continue # have already advanced pos from getcomment + elif self.field[self.pos] == '\\': + quote = True + else: + slist.append(self.field[self.pos]) + self.pos += 1 + + return EMPTYSTRING.join(slist) + + def getquote(self): + """Get a quote-delimited fragment from self's field.""" + return self.getdelimited('"', '"\r', False) + + def getcomment(self): + """Get a parenthesis-delimited fragment from self's field.""" + return self.getdelimited('(', ')\r', True) + + def getdomainliteral(self): + """Parse an RFC 2822 domain-literal.""" + return '[%s]' % self.getdelimited('[', ']\r', False) + + def getatom(self, atomends=None): + """Parse an RFC 2822 atom. + + Optional atomends specifies a different set of end token delimiters + (the default is to use self.atomends). This is used e.g. in + getphraselist() since phrase endings must not include the `.' (which + is legal in phrases).""" + atomlist = [''] + if atomends is None: + atomends = self.atomends + + while self.pos < len(self.field): + if self.field[self.pos] in atomends: + break + else: + atomlist.append(self.field[self.pos]) + self.pos += 1 + + return EMPTYSTRING.join(atomlist) + + def getphraselist(self): + """Parse a sequence of RFC 2822 phrases. + + A phrase is a sequence of words, which are in turn either RFC 2822 + atoms or quoted-strings. Phrases are canonicalized by squeezing all + runs of continuous whitespace into one space. + """ + plist = [] + + while self.pos < len(self.field): + if self.field[self.pos] in self.FWS: + self.pos += 1 + elif self.field[self.pos] == '"': + plist.append(self.getquote()) + elif self.field[self.pos] == '(': + self.commentlist.append(self.getcomment()) + elif self.field[self.pos] in self.phraseends: + break + else: + plist.append(self.getatom(self.phraseends)) + + return plist + +class AddressList(AddrlistClass): + """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" + def __init__(self, field): + AddrlistClass.__init__(self, field) + if field: + self.addresslist = self.getaddrlist() + else: + self.addresslist = [] + + def __len__(self): + return len(self.addresslist) + + def __add__(self, other): + # Set union + newaddr = AddressList(None) + newaddr.addresslist = self.addresslist[:] + for x in other.addresslist: + if not x in self.addresslist: + newaddr.addresslist.append(x) + return newaddr + + def __iadd__(self, other): + # Set union, in-place + for x in other.addresslist: + if not x in self.addresslist: + self.addresslist.append(x) + return self + + def __sub__(self, other): + # Set difference + newaddr = AddressList(None) + for x in self.addresslist: + if not x in other.addresslist: + newaddr.addresslist.append(x) + return newaddr + + def __isub__(self, other): + # Set difference, in-place + for x in other.addresslist: + if x in self.addresslist: + self.addresslist.remove(x) + return self + + def __getitem__(self, index): + # Make indexing, slices, and 'in' work + return self.addresslist[index] diff --git a/src/future/backports/email/_policybase.py b/src/future/backports/email/_policybase.py new file mode 100644 index 00000000..c66aea90 --- /dev/null +++ b/src/future/backports/email/_policybase.py @@ -0,0 +1,365 @@ +"""Policy framework for the email package. + +Allows fine grained feature control of how the package parses and emits data. +""" +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from future.builtins import super +from future.builtins import str +from future.utils import with_metaclass + +import abc +from future.backports.email import header +from future.backports.email import charset as _charset +from future.backports.email.utils import _has_surrogates + +__all__ = [ + 'Policy', + 'Compat32', + 'compat32', + ] + + +class _PolicyBase(object): + + """Policy Object basic framework. + + This class is useless unless subclassed. A subclass should define + class attributes with defaults for any values that are to be + managed by the Policy object. The constructor will then allow + non-default values to be set for these attributes at instance + creation time. The instance will be callable, taking these same + attributes keyword arguments, and returning a new instance + identical to the called instance except for those values changed + by the keyword arguments. Instances may be added, yielding new + instances with any non-default values from the right hand + operand overriding those in the left hand operand. That is, + + A + B == A() + + The repr of an instance can be used to reconstruct the object + if and only if the repr of the values can be used to reconstruct + those values. + + """ + + def __init__(self, **kw): + """Create new Policy, possibly overriding some defaults. + + See class docstring for a list of overridable attributes. + + """ + for name, value in kw.items(): + if hasattr(self, name): + super(_PolicyBase,self).__setattr__(name, value) + else: + raise TypeError( + "{!r} is an invalid keyword argument for {}".format( + name, self.__class__.__name__)) + + def __repr__(self): + args = [ "{}={!r}".format(name, value) + for name, value in self.__dict__.items() ] + return "{}({})".format(self.__class__.__name__, ', '.join(args)) + + def clone(self, **kw): + """Return a new instance with specified attributes changed. + + The new instance has the same attribute values as the current object, + except for the changes passed in as keyword arguments. + + """ + newpolicy = self.__class__.__new__(self.__class__) + for attr, value in self.__dict__.items(): + object.__setattr__(newpolicy, attr, value) + for attr, value in kw.items(): + if not hasattr(self, attr): + raise TypeError( + "{!r} is an invalid keyword argument for {}".format( + attr, self.__class__.__name__)) + object.__setattr__(newpolicy, attr, value) + return newpolicy + + def __setattr__(self, name, value): + if hasattr(self, name): + msg = "{!r} object attribute {!r} is read-only" + else: + msg = "{!r} object has no attribute {!r}" + raise AttributeError(msg.format(self.__class__.__name__, name)) + + def __add__(self, other): + """Non-default values from right operand override those from left. + + The object returned is a new instance of the subclass. + + """ + return self.clone(**other.__dict__) + + +def _append_doc(doc, added_doc): + doc = doc.rsplit('\n', 1)[0] + added_doc = added_doc.split('\n', 1)[1] + return doc + '\n' + added_doc + +def _extend_docstrings(cls): + if cls.__doc__ and cls.__doc__.startswith('+'): + cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__) + for name, attr in cls.__dict__.items(): + if attr.__doc__ and attr.__doc__.startswith('+'): + for c in (c for base in cls.__bases__ for c in base.mro()): + doc = getattr(getattr(c, name), '__doc__') + if doc: + attr.__doc__ = _append_doc(doc, attr.__doc__) + break + return cls + + +class Policy(with_metaclass(abc.ABCMeta, _PolicyBase)): + + r"""Controls for how messages are interpreted and formatted. + + Most of the classes and many of the methods in the email package accept + Policy objects as parameters. A Policy object contains a set of values and + functions that control how input is interpreted and how output is rendered. + For example, the parameter 'raise_on_defect' controls whether or not an RFC + violation results in an error being raised or not, while 'max_line_length' + controls the maximum length of output lines when a Message is serialized. + + Any valid attribute may be overridden when a Policy is created by passing + it as a keyword argument to the constructor. Policy objects are immutable, + but a new Policy object can be created with only certain values changed by + calling the Policy instance with keyword arguments. Policy objects can + also be added, producing a new Policy object in which the non-default + attributes set in the right hand operand overwrite those specified in the + left operand. + + Settable attributes: + + raise_on_defect -- If true, then defects should be raised as errors. + Default: False. + + linesep -- string containing the value to use as separation + between output lines. Default '\n'. + + cte_type -- Type of allowed content transfer encodings + + 7bit -- ASCII only + 8bit -- Content-Transfer-Encoding: 8bit is allowed + + Default: 8bit. Also controls the disposition of + (RFC invalid) binary data in headers; see the + documentation of the binary_fold method. + + max_line_length -- maximum length of lines, excluding 'linesep', + during serialization. None or 0 means no line + wrapping is done. Default is 78. + + """ + + raise_on_defect = False + linesep = '\n' + cte_type = '8bit' + max_line_length = 78 + + def handle_defect(self, obj, defect): + """Based on policy, either raise defect or call register_defect. + + handle_defect(obj, defect) + + defect should be a Defect subclass, but in any case must be an + Exception subclass. obj is the object on which the defect should be + registered if it is not raised. If the raise_on_defect is True, the + defect is raised as an error, otherwise the object and the defect are + passed to register_defect. + + This method is intended to be called by parsers that discover defects. + The email package parsers always call it with Defect instances. + + """ + if self.raise_on_defect: + raise defect + self.register_defect(obj, defect) + + def register_defect(self, obj, defect): + """Record 'defect' on 'obj'. + + Called by handle_defect if raise_on_defect is False. This method is + part of the Policy API so that Policy subclasses can implement custom + defect handling. The default implementation calls the append method of + the defects attribute of obj. The objects used by the email package by + default that get passed to this method will always have a defects + attribute with an append method. + + """ + obj.defects.append(defect) + + def header_max_count(self, name): + """Return the maximum allowed number of headers named 'name'. + + Called when a header is added to a Message object. If the returned + value is not 0 or None, and there are already a number of headers with + the name 'name' equal to the value returned, a ValueError is raised. + + Because the default behavior of Message's __setitem__ is to append the + value to the list of headers, it is easy to create duplicate headers + without realizing it. This method allows certain headers to be limited + in the number of instances of that header that may be added to a + Message programmatically. (The limit is not observed by the parser, + which will faithfully produce as many headers as exist in the message + being parsed.) + + The default implementation returns None for all header names. + """ + return None + + @abc.abstractmethod + def header_source_parse(self, sourcelines): + """Given a list of linesep terminated strings constituting the lines of + a single header, return the (name, value) tuple that should be stored + in the model. The input lines should retain their terminating linesep + characters. The lines passed in by the email package may contain + surrogateescaped binary data. + """ + raise NotImplementedError + + @abc.abstractmethod + def header_store_parse(self, name, value): + """Given the header name and the value provided by the application + program, return the (name, value) that should be stored in the model. + """ + raise NotImplementedError + + @abc.abstractmethod + def header_fetch_parse(self, name, value): + """Given the header name and the value from the model, return the value + to be returned to the application program that is requesting that + header. The value passed in by the email package may contain + surrogateescaped binary data if the lines were parsed by a BytesParser. + The returned value should not contain any surrogateescaped data. + + """ + raise NotImplementedError + + @abc.abstractmethod + def fold(self, name, value): + """Given the header name and the value from the model, return a string + containing linesep characters that implement the folding of the header + according to the policy controls. The value passed in by the email + package may contain surrogateescaped binary data if the lines were + parsed by a BytesParser. The returned value should not contain any + surrogateescaped data. + + """ + raise NotImplementedError + + @abc.abstractmethod + def fold_binary(self, name, value): + """Given the header name and the value from the model, return binary + data containing linesep characters that implement the folding of the + header according to the policy controls. The value passed in by the + email package may contain surrogateescaped binary data. + + """ + raise NotImplementedError + + +@_extend_docstrings +class Compat32(Policy): + + """+ + This particular policy is the backward compatibility Policy. It + replicates the behavior of the email package version 5.1. + """ + + def _sanitize_header(self, name, value): + # If the header value contains surrogates, return a Header using + # the unknown-8bit charset to encode the bytes as encoded words. + if not isinstance(value, str): + # Assume it is already a header object + return value + if _has_surrogates(value): + return header.Header(value, charset=_charset.UNKNOWN8BIT, + header_name=name) + else: + return value + + def header_source_parse(self, sourcelines): + """+ + The name is parsed as everything up to the ':' and returned unmodified. + The value is determined by stripping leading whitespace off the + remainder of the first line, joining all subsequent lines together, and + stripping any trailing carriage return or linefeed characters. + + """ + name, value = sourcelines[0].split(':', 1) + value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + return (name, value.rstrip('\r\n')) + + def header_store_parse(self, name, value): + """+ + The name and value are returned unmodified. + """ + return (name, value) + + def header_fetch_parse(self, name, value): + """+ + If the value contains binary data, it is converted into a Header object + using the unknown-8bit charset. Otherwise it is returned unmodified. + """ + return self._sanitize_header(name, value) + + def fold(self, name, value): + """+ + Headers are folded using the Header folding algorithm, which preserves + existing line breaks in the value, and wraps each resulting line to the + max_line_length. Non-ASCII binary data are CTE encoded using the + unknown-8bit charset. + + """ + return self._fold(name, value, sanitize=True) + + def fold_binary(self, name, value): + """+ + Headers are folded using the Header folding algorithm, which preserves + existing line breaks in the value, and wraps each resulting line to the + max_line_length. If cte_type is 7bit, non-ascii binary data is CTE + encoded using the unknown-8bit charset. Otherwise the original source + header is used, with its existing line breaks and/or binary data. + + """ + folded = self._fold(name, value, sanitize=self.cte_type=='7bit') + return folded.encode('ascii', 'surrogateescape') + + def _fold(self, name, value, sanitize): + parts = [] + parts.append('%s: ' % name) + if isinstance(value, str): + if _has_surrogates(value): + if sanitize: + h = header.Header(value, + charset=_charset.UNKNOWN8BIT, + header_name=name) + else: + # If we have raw 8bit data in a byte string, we have no idea + # what the encoding is. There is no safe way to split this + # string. If it's ascii-subset, then we could do a normal + # ascii split, but if it's multibyte then we could break the + # string. There's no way to know so the least harm seems to + # be to not split the string and risk it being too long. + parts.append(value) + h = None + else: + h = header.Header(value, header_name=name) + else: + # Assume it is a Header-like object. + h = value + if h is not None: + parts.append(h.encode(linesep=self.linesep, + maxlinelen=self.max_line_length)) + parts.append(self.linesep) + return ''.join(parts) + + +compat32 = Compat32() diff --git a/src/future/backports/email/base64mime.py b/src/future/backports/email/base64mime.py new file mode 100644 index 00000000..296392a6 --- /dev/null +++ b/src/future/backports/email/base64mime.py @@ -0,0 +1,121 @@ +# Copyright (C) 2002-2007 Python Software Foundation +# Author: Ben Gertzfield +# Contact: email-sig@python.org + +"""Base64 content transfer encoding per RFCs 2045-2047. + +This module handles the content transfer encoding method defined in RFC 2045 +to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit +characters encoding known as Base64. + +It is used in the MIME standards for email to attach images, audio, and text +using some 8-bit character sets to messages. + +This module provides an interface to encode and decode both headers and bodies +with Base64 encoding. + +RFC 2045 defines a method for including character set information in an +`encoded-word' in a header. This method is commonly used for 8-bit real names +in To:, From:, Cc:, etc. fields, as well as Subject: lines. + +This module does not do the line wrapping or end-of-line character conversion +necessary for proper internationalized headers; it only does dumb encoding and +decoding. To deal with the various line wrapping issues, use the email.header +module. +""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future.builtins import range +from future.builtins import bytes +from future.builtins import str + +__all__ = [ + 'body_decode', + 'body_encode', + 'decode', + 'decodestring', + 'header_encode', + 'header_length', + ] + + +from base64 import b64encode +from binascii import b2a_base64, a2b_base64 + +CRLF = '\r\n' +NL = '\n' +EMPTYSTRING = '' + +# See also Charset.py +MISC_LEN = 7 + + +# Helpers +def header_length(bytearray): + """Return the length of s when it is encoded with base64.""" + groups_of_3, leftover = divmod(len(bytearray), 3) + # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. + n = groups_of_3 * 4 + if leftover: + n += 4 + return n + + +def header_encode(header_bytes, charset='iso-8859-1'): + """Encode a single header line with Base64 encoding in a given charset. + + charset names the character set to use to encode the header. It defaults + to iso-8859-1. Base64 encoding is defined in RFC 2045. + """ + if not header_bytes: + return "" + if isinstance(header_bytes, str): + header_bytes = header_bytes.encode(charset) + encoded = b64encode(header_bytes).decode("ascii") + return '=?%s?b?%s?=' % (charset, encoded) + + +def body_encode(s, maxlinelen=76, eol=NL): + r"""Encode a string with base64. + + Each line will be wrapped at, at most, maxlinelen characters (defaults to + 76 characters). + + Each line of encoded text will end with eol, which defaults to "\n". Set + this to "\r\n" if you will be using the result of this function directly + in an email. + """ + if not s: + return s + + encvec = [] + max_unencoded = maxlinelen * 3 // 4 + for i in range(0, len(s), max_unencoded): + # BAW: should encode() inherit b2a_base64()'s dubious behavior in + # adding a newline to the encoded string? + enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii") + if enc.endswith(NL) and eol != NL: + enc = enc[:-1] + eol + encvec.append(enc) + return EMPTYSTRING.join(encvec) + + +def decode(string): + """Decode a raw base64 string, returning a bytes object. + + This function does not parse a full MIME header value encoded with + base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high + level email.header class for that functionality. + """ + if not string: + return bytes() + elif isinstance(string, str): + return a2b_base64(string.encode('raw-unicode-escape')) + else: + return a2b_base64(string) + + +# For convenience and backwards compatibility w/ standard base64 module +body_decode = decode +decodestring = decode diff --git a/src/future/backports/email/charset.py b/src/future/backports/email/charset.py new file mode 100644 index 00000000..2385ce68 --- /dev/null +++ b/src/future/backports/email/charset.py @@ -0,0 +1,409 @@ +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future.builtins import str +from future.builtins import next + +# Copyright (C) 2001-2007 Python Software Foundation +# Author: Ben Gertzfield, Barry Warsaw +# Contact: email-sig@python.org + +__all__ = [ + 'Charset', + 'add_alias', + 'add_charset', + 'add_codec', + ] + +from functools import partial + +from future.backports import email +from future.backports.email import errors +from future.backports.email.encoders import encode_7or8bit + + +# Flags for types of header encodings +QP = 1 # Quoted-Printable +BASE64 = 2 # Base64 +SHORTEST = 3 # the shorter of QP and base64, but only for headers + +# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7 +RFC2047_CHROME_LEN = 7 + +DEFAULT_CHARSET = 'us-ascii' +UNKNOWN8BIT = 'unknown-8bit' +EMPTYSTRING = '' + + +# Defaults +CHARSETS = { + # input header enc body enc output conv + 'iso-8859-1': (QP, QP, None), + 'iso-8859-2': (QP, QP, None), + 'iso-8859-3': (QP, QP, None), + 'iso-8859-4': (QP, QP, None), + # iso-8859-5 is Cyrillic, and not especially used + # iso-8859-6 is Arabic, also not particularly used + # iso-8859-7 is Greek, QP will not make it readable + # iso-8859-8 is Hebrew, QP will not make it readable + 'iso-8859-9': (QP, QP, None), + 'iso-8859-10': (QP, QP, None), + # iso-8859-11 is Thai, QP will not make it readable + 'iso-8859-13': (QP, QP, None), + 'iso-8859-14': (QP, QP, None), + 'iso-8859-15': (QP, QP, None), + 'iso-8859-16': (QP, QP, None), + 'windows-1252':(QP, QP, None), + 'viscii': (QP, QP, None), + 'us-ascii': (None, None, None), + 'big5': (BASE64, BASE64, None), + 'gb2312': (BASE64, BASE64, None), + 'euc-jp': (BASE64, None, 'iso-2022-jp'), + 'shift_jis': (BASE64, None, 'iso-2022-jp'), + 'iso-2022-jp': (BASE64, None, None), + 'koi8-r': (BASE64, BASE64, None), + 'utf-8': (SHORTEST, BASE64, 'utf-8'), + } + +# Aliases for other commonly-used names for character sets. Map +# them to the real ones used in email. +ALIASES = { + 'latin_1': 'iso-8859-1', + 'latin-1': 'iso-8859-1', + 'latin_2': 'iso-8859-2', + 'latin-2': 'iso-8859-2', + 'latin_3': 'iso-8859-3', + 'latin-3': 'iso-8859-3', + 'latin_4': 'iso-8859-4', + 'latin-4': 'iso-8859-4', + 'latin_5': 'iso-8859-9', + 'latin-5': 'iso-8859-9', + 'latin_6': 'iso-8859-10', + 'latin-6': 'iso-8859-10', + 'latin_7': 'iso-8859-13', + 'latin-7': 'iso-8859-13', + 'latin_8': 'iso-8859-14', + 'latin-8': 'iso-8859-14', + 'latin_9': 'iso-8859-15', + 'latin-9': 'iso-8859-15', + 'latin_10':'iso-8859-16', + 'latin-10':'iso-8859-16', + 'cp949': 'ks_c_5601-1987', + 'euc_jp': 'euc-jp', + 'euc_kr': 'euc-kr', + 'ascii': 'us-ascii', + } + + +# Map charsets to their Unicode codec strings. +CODEC_MAP = { + 'gb2312': 'eucgb2312_cn', + 'big5': 'big5_tw', + # Hack: We don't want *any* conversion for stuff marked us-ascii, as all + # sorts of garbage might be sent to us in the guise of 7-bit us-ascii. + # Let that stuff pass through without conversion to/from Unicode. + 'us-ascii': None, + } + + +# Convenience functions for extending the above mappings +def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): + """Add character set properties to the global registry. + + charset is the input character set, and must be the canonical name of a + character set. + + Optional header_enc and body_enc is either Charset.QP for + quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for + the shortest of qp or base64 encoding, or None for no encoding. SHORTEST + is only valid for header_enc. It describes how message headers and + message bodies in the input charset are to be encoded. Default is no + encoding. + + Optional output_charset is the character set that the output should be + in. Conversions will proceed from input charset, to Unicode, to the + output charset when the method Charset.convert() is called. The default + is to output in the same character set as the input. + + Both input_charset and output_charset must have Unicode codec entries in + the module's charset-to-codec mapping; use add_codec(charset, codecname) + to add codecs the module does not know about. See the codecs module's + documentation for more information. + """ + if body_enc == SHORTEST: + raise ValueError('SHORTEST not allowed for body_enc') + CHARSETS[charset] = (header_enc, body_enc, output_charset) + + +def add_alias(alias, canonical): + """Add a character set alias. + + alias is the alias name, e.g. latin-1 + canonical is the character set's canonical name, e.g. iso-8859-1 + """ + ALIASES[alias] = canonical + + +def add_codec(charset, codecname): + """Add a codec that map characters in the given charset to/from Unicode. + + charset is the canonical name of a character set. codecname is the name + of a Python codec, as appropriate for the second argument to the unicode() + built-in, or to the encode() method of a Unicode string. + """ + CODEC_MAP[charset] = codecname + + +# Convenience function for encoding strings, taking into account +# that they might be unknown-8bit (ie: have surrogate-escaped bytes) +def _encode(string, codec): + string = str(string) + if codec == UNKNOWN8BIT: + return string.encode('ascii', 'surrogateescape') + else: + return string.encode(codec) + + +class Charset(object): + """Map character sets to their email properties. + + This class provides information about the requirements imposed on email + for a specific character set. It also provides convenience routines for + converting between character sets, given the availability of the + applicable codecs. Given a character set, it will do its best to provide + information on how to use that character set in an email in an + RFC-compliant way. + + Certain character sets must be encoded with quoted-printable or base64 + when used in email headers or bodies. Certain character sets must be + converted outright, and are not allowed in email. Instances of this + module expose the following information about a character set: + + input_charset: The initial character set specified. Common aliases + are converted to their `official' email names (e.g. latin_1 + is converted to iso-8859-1). Defaults to 7-bit us-ascii. + + header_encoding: If the character set must be encoded before it can be + used in an email header, this attribute will be set to + Charset.QP (for quoted-printable), Charset.BASE64 (for + base64 encoding), or Charset.SHORTEST for the shortest of + QP or BASE64 encoding. Otherwise, it will be None. + + body_encoding: Same as header_encoding, but describes the encoding for the + mail message's body, which indeed may be different than the + header encoding. Charset.SHORTEST is not allowed for + body_encoding. + + output_charset: Some character sets must be converted before they can be + used in email headers or bodies. If the input_charset is + one of them, this attribute will contain the name of the + charset output will be converted to. Otherwise, it will + be None. + + input_codec: The name of the Python codec used to convert the + input_charset to Unicode. If no conversion codec is + necessary, this attribute will be None. + + output_codec: The name of the Python codec used to convert Unicode + to the output_charset. If no conversion codec is necessary, + this attribute will have the same value as the input_codec. + """ + def __init__(self, input_charset=DEFAULT_CHARSET): + # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to + # unicode because its .lower() is locale insensitive. If the argument + # is already a unicode, we leave it at that, but ensure that the + # charset is ASCII, as the standard (RFC XXX) requires. + try: + if isinstance(input_charset, str): + input_charset.encode('ascii') + else: + input_charset = str(input_charset, 'ascii') + except UnicodeError: + raise errors.CharsetError(input_charset) + input_charset = input_charset.lower() + # Set the input charset after filtering through the aliases + self.input_charset = ALIASES.get(input_charset, input_charset) + # We can try to guess which encoding and conversion to use by the + # charset_map dictionary. Try that first, but let the user override + # it. + henc, benc, conv = CHARSETS.get(self.input_charset, + (SHORTEST, BASE64, None)) + if not conv: + conv = self.input_charset + # Set the attributes, allowing the arguments to override the default. + self.header_encoding = henc + self.body_encoding = benc + self.output_charset = ALIASES.get(conv, conv) + # Now set the codecs. If one isn't defined for input_charset, + # guess and try a Unicode codec with the same name as input_codec. + self.input_codec = CODEC_MAP.get(self.input_charset, + self.input_charset) + self.output_codec = CODEC_MAP.get(self.output_charset, + self.output_charset) + + def __str__(self): + return self.input_charset.lower() + + __repr__ = __str__ + + def __eq__(self, other): + return str(self) == str(other).lower() + + def __ne__(self, other): + return not self.__eq__(other) + + def get_body_encoding(self): + """Return the content-transfer-encoding used for body encoding. + + This is either the string `quoted-printable' or `base64' depending on + the encoding used, or it is a function in which case you should call + the function with a single argument, the Message object being + encoded. The function should then set the Content-Transfer-Encoding + header itself to whatever is appropriate. + + Returns "quoted-printable" if self.body_encoding is QP. + Returns "base64" if self.body_encoding is BASE64. + Returns conversion function otherwise. + """ + assert self.body_encoding != SHORTEST + if self.body_encoding == QP: + return 'quoted-printable' + elif self.body_encoding == BASE64: + return 'base64' + else: + return encode_7or8bit + + def get_output_charset(self): + """Return the output character set. + + This is self.output_charset if that is not None, otherwise it is + self.input_charset. + """ + return self.output_charset or self.input_charset + + def header_encode(self, string): + """Header-encode a string by converting it first to bytes. + + The type of encoding (base64 or quoted-printable) will be based on + this charset's `header_encoding`. + + :param string: A unicode string for the header. It must be possible + to encode this string to bytes using the character set's + output codec. + :return: The encoded string, with RFC 2047 chrome. + """ + codec = self.output_codec or 'us-ascii' + header_bytes = _encode(string, codec) + # 7bit/8bit encodings return the string unchanged (modulo conversions) + encoder_module = self._get_encoder(header_bytes) + if encoder_module is None: + return string + return encoder_module.header_encode(header_bytes, codec) + + def header_encode_lines(self, string, maxlengths): + """Header-encode a string by converting it first to bytes. + + This is similar to `header_encode()` except that the string is fit + into maximum line lengths as given by the argument. + + :param string: A unicode string for the header. It must be possible + to encode this string to bytes using the character set's + output codec. + :param maxlengths: Maximum line length iterator. Each element + returned from this iterator will provide the next maximum line + length. This parameter is used as an argument to built-in next() + and should never be exhausted. The maximum line lengths should + not count the RFC 2047 chrome. These line lengths are only a + hint; the splitter does the best it can. + :return: Lines of encoded strings, each with RFC 2047 chrome. + """ + # See which encoding we should use. + codec = self.output_codec or 'us-ascii' + header_bytes = _encode(string, codec) + encoder_module = self._get_encoder(header_bytes) + encoder = partial(encoder_module.header_encode, charset=codec) + # Calculate the number of characters that the RFC 2047 chrome will + # contribute to each line. + charset = self.get_output_charset() + extra = len(charset) + RFC2047_CHROME_LEN + # Now comes the hard part. We must encode bytes but we can't split on + # bytes because some character sets are variable length and each + # encoded word must stand on its own. So the problem is you have to + # encode to bytes to figure out this word's length, but you must split + # on characters. This causes two problems: first, we don't know how + # many octets a specific substring of unicode characters will get + # encoded to, and second, we don't know how many ASCII characters + # those octets will get encoded to. Unless we try it. Which seems + # inefficient. In the interest of being correct rather than fast (and + # in the hope that there will be few encoded headers in any such + # message), brute force it. :( + lines = [] + current_line = [] + maxlen = next(maxlengths) - extra + for character in string: + current_line.append(character) + this_line = EMPTYSTRING.join(current_line) + length = encoder_module.header_length(_encode(this_line, charset)) + if length > maxlen: + # This last character doesn't fit so pop it off. + current_line.pop() + # Does nothing fit on the first line? + if not lines and not current_line: + lines.append(None) + else: + separator = (' ' if lines else '') + joined_line = EMPTYSTRING.join(current_line) + header_bytes = _encode(joined_line, codec) + lines.append(encoder(header_bytes)) + current_line = [character] + maxlen = next(maxlengths) - extra + joined_line = EMPTYSTRING.join(current_line) + header_bytes = _encode(joined_line, codec) + lines.append(encoder(header_bytes)) + return lines + + def _get_encoder(self, header_bytes): + if self.header_encoding == BASE64: + return email.base64mime + elif self.header_encoding == QP: + return email.quoprimime + elif self.header_encoding == SHORTEST: + len64 = email.base64mime.header_length(header_bytes) + lenqp = email.quoprimime.header_length(header_bytes) + if len64 < lenqp: + return email.base64mime + else: + return email.quoprimime + else: + return None + + def body_encode(self, string): + """Body-encode a string by converting it first to bytes. + + The type of encoding (base64 or quoted-printable) will be based on + self.body_encoding. If body_encoding is None, we assume the + output charset is a 7bit encoding, so re-encoding the decoded + string using the ascii codec produces the correct string version + of the content. + """ + if not string: + return string + if self.body_encoding is BASE64: + if isinstance(string, str): + string = string.encode(self.output_charset) + return email.base64mime.body_encode(string) + elif self.body_encoding is QP: + # quopromime.body_encode takes a string, but operates on it as if + # it were a list of byte codes. For a (minimal) history on why + # this is so, see changeset 0cf700464177. To correctly encode a + # character set, then, we must turn it into pseudo bytes via the + # latin1 charset, which will encode any byte as a single code point + # between 0 and 255, which is what body_encode is expecting. + if isinstance(string, str): + string = string.encode(self.output_charset) + string = string.decode('latin1') + return email.quoprimime.body_encode(string) + else: + if isinstance(string, str): + string = string.encode(self.output_charset).decode('ascii') + return string diff --git a/src/future/backports/email/encoders.py b/src/future/backports/email/encoders.py new file mode 100644 index 00000000..15d2eb46 --- /dev/null +++ b/src/future/backports/email/encoders.py @@ -0,0 +1,90 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Encodings and related functions.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future.builtins import str + +__all__ = [ + 'encode_7or8bit', + 'encode_base64', + 'encode_noop', + 'encode_quopri', + ] + + +try: + from base64 import encodebytes as _bencode +except ImportError: + # Py2 compatibility. TODO: test this! + from base64 import encodestring as _bencode +from quopri import encodestring as _encodestring + + +def _qencode(s): + enc = _encodestring(s, quotetabs=True) + # Must encode spaces, which quopri.encodestring() doesn't do + return enc.replace(' ', '=20') + + +def encode_base64(msg): + """Encode the message's payload in Base64. + + Also, add an appropriate Content-Transfer-Encoding header. + """ + orig = msg.get_payload() + encdata = str(_bencode(orig), 'ascii') + msg.set_payload(encdata) + msg['Content-Transfer-Encoding'] = 'base64' + + +def encode_quopri(msg): + """Encode the message's payload in quoted-printable. + + Also, add an appropriate Content-Transfer-Encoding header. + """ + orig = msg.get_payload() + encdata = _qencode(orig) + msg.set_payload(encdata) + msg['Content-Transfer-Encoding'] = 'quoted-printable' + + +def encode_7or8bit(msg): + """Set the Content-Transfer-Encoding header to 7bit or 8bit.""" + orig = msg.get_payload() + if orig is None: + # There's no payload. For backwards compatibility we use 7bit + msg['Content-Transfer-Encoding'] = '7bit' + return + # We play a trick to make this go fast. If encoding/decode to ASCII + # succeeds, we know the data must be 7bit, otherwise treat it as 8bit. + try: + if isinstance(orig, str): + orig.encode('ascii') + else: + orig.decode('ascii') + except UnicodeError: + charset = msg.get_charset() + output_cset = charset and charset.output_charset + # iso-2022-* is non-ASCII but encodes to a 7-bit representation + if output_cset and output_cset.lower().startswith('iso-2022-'): + msg['Content-Transfer-Encoding'] = '7bit' + else: + msg['Content-Transfer-Encoding'] = '8bit' + else: + msg['Content-Transfer-Encoding'] = '7bit' + if not isinstance(orig, str): + msg.set_payload(orig.decode('ascii', 'surrogateescape')) + + +def encode_noop(msg): + """Do nothing.""" + # Well, not quite *nothing*: in Python3 we have to turn bytes into a string + # in our internal surrogateescaped form in order to keep the model + # consistent. + orig = msg.get_payload() + if not isinstance(orig, str): + msg.set_payload(orig.decode('ascii', 'surrogateescape')) diff --git a/src/future/backports/email/errors.py b/src/future/backports/email/errors.py new file mode 100644 index 00000000..0fe599cf --- /dev/null +++ b/src/future/backports/email/errors.py @@ -0,0 +1,111 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""email package exception classes.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future.builtins import super + + +class MessageError(Exception): + """Base class for errors in the email package.""" + + +class MessageParseError(MessageError): + """Base class for message parsing errors.""" + + +class HeaderParseError(MessageParseError): + """Error while parsing headers.""" + + +class BoundaryError(MessageParseError): + """Couldn't find terminating boundary.""" + + +class MultipartConversionError(MessageError, TypeError): + """Conversion to a multipart is prohibited.""" + + +class CharsetError(MessageError): + """An illegal charset was given.""" + + +# These are parsing defects which the parser was able to work around. +class MessageDefect(ValueError): + """Base class for a message defect.""" + + def __init__(self, line=None): + if line is not None: + super().__init__(line) + self.line = line + +class NoBoundaryInMultipartDefect(MessageDefect): + """A message claimed to be a multipart but had no boundary parameter.""" + +class StartBoundaryNotFoundDefect(MessageDefect): + """The claimed start boundary was never found.""" + +class CloseBoundaryNotFoundDefect(MessageDefect): + """A start boundary was found, but not the corresponding close boundary.""" + +class FirstHeaderLineIsContinuationDefect(MessageDefect): + """A message had a continuation line as its first header line.""" + +class MisplacedEnvelopeHeaderDefect(MessageDefect): + """A 'Unix-from' header was found in the middle of a header block.""" + +class MissingHeaderBodySeparatorDefect(MessageDefect): + """Found line with no leading whitespace and no colon before blank line.""" +# XXX: backward compatibility, just in case (it was never emitted). +MalformedHeaderDefect = MissingHeaderBodySeparatorDefect + +class MultipartInvariantViolationDefect(MessageDefect): + """A message claimed to be a multipart but no subparts were found.""" + +class InvalidMultipartContentTransferEncodingDefect(MessageDefect): + """An invalid content transfer encoding was set on the multipart itself.""" + +class UndecodableBytesDefect(MessageDefect): + """Header contained bytes that could not be decoded""" + +class InvalidBase64PaddingDefect(MessageDefect): + """base64 encoded sequence had an incorrect length""" + +class InvalidBase64CharactersDefect(MessageDefect): + """base64 encoded sequence had characters not in base64 alphabet""" + +# These errors are specific to header parsing. + +class HeaderDefect(MessageDefect): + """Base class for a header defect.""" + + def __init__(self, *args, **kw): + super().__init__(*args, **kw) + +class InvalidHeaderDefect(HeaderDefect): + """Header is not valid, message gives details.""" + +class HeaderMissingRequiredValue(HeaderDefect): + """A header that must have a value had none""" + +class NonPrintableDefect(HeaderDefect): + """ASCII characters outside the ascii-printable range found""" + + def __init__(self, non_printables): + super().__init__(non_printables) + self.non_printables = non_printables + + def __str__(self): + return ("the following ASCII non-printables found in header: " + "{}".format(self.non_printables)) + +class ObsoleteHeaderDefect(HeaderDefect): + """Header uses syntax declared obsolete by RFC 5322""" + +class NonASCIILocalPartDefect(HeaderDefect): + """local_part contains non-ASCII characters""" + # This defect only occurs during unicode parsing, not when + # parsing messages decoded from binary. diff --git a/src/future/backports/email/feedparser.py b/src/future/backports/email/feedparser.py new file mode 100644 index 00000000..935c26e3 --- /dev/null +++ b/src/future/backports/email/feedparser.py @@ -0,0 +1,525 @@ +# Copyright (C) 2004-2006 Python Software Foundation +# Authors: Baxter, Wouters and Warsaw +# Contact: email-sig@python.org + +"""FeedParser - An email feed parser. + +The feed parser implements an interface for incrementally parsing an email +message, line by line. This has advantages for certain applications, such as +those reading email messages off a socket. + +FeedParser.feed() is the primary interface for pushing new data into the +parser. It returns when there's nothing more it can do with the available +data. When you have no more data to push into the parser, call .close(). +This completes the parsing and returns the root message object. + +The other advantage of this parser is that it will never raise a parsing +exception. Instead, when it finds something unexpected, it adds a 'defect' to +the current message. Defects are just instances that live on the message +object's .defects attribute. +""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future.builtins import object, range, super +from future.utils import implements_iterator, PY3 + +__all__ = ['FeedParser', 'BytesFeedParser'] + +import re + +from future.backports.email import errors +from future.backports.email import message +from future.backports.email._policybase import compat32 + +NLCRE = re.compile('\r\n|\r|\n') +NLCRE_bol = re.compile('(\r\n|\r|\n)') +NLCRE_eol = re.compile('(\r\n|\r|\n)\Z') +NLCRE_crack = re.compile('(\r\n|\r|\n)') +# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character +# except controls, SP, and ":". +headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])') +EMPTYSTRING = '' +NL = '\n' + +NeedMoreData = object() + + +# @implements_iterator +class BufferedSubFile(object): + """A file-ish object that can have new data loaded into it. + + You can also push and pop line-matching predicates onto a stack. When the + current predicate matches the current line, a false EOF response + (i.e. empty string) is returned instead. This lets the parser adhere to a + simple abstraction -- it parses until EOF closes the current message. + """ + def __init__(self): + # The last partial line pushed into this object. + self._partial = '' + # The list of full, pushed lines, in reverse order + self._lines = [] + # The stack of false-EOF checking predicates. + self._eofstack = [] + # A flag indicating whether the file has been closed or not. + self._closed = False + + def push_eof_matcher(self, pred): + self._eofstack.append(pred) + + def pop_eof_matcher(self): + return self._eofstack.pop() + + def close(self): + # Don't forget any trailing partial line. + self._lines.append(self._partial) + self._partial = '' + self._closed = True + + def readline(self): + if not self._lines: + if self._closed: + return '' + return NeedMoreData + # Pop the line off the stack and see if it matches the current + # false-EOF predicate. + line = self._lines.pop() + # RFC 2046, section 5.1.2 requires us to recognize outer level + # boundaries at any level of inner nesting. Do this, but be sure it's + # in the order of most to least nested. + for ateof in self._eofstack[::-1]: + if ateof(line): + # We're at the false EOF. But push the last line back first. + self._lines.append(line) + return '' + return line + + def unreadline(self, line): + # Let the consumer push a line back into the buffer. + assert line is not NeedMoreData + self._lines.append(line) + + def push(self, data): + """Push some new data into this object.""" + # Handle any previous leftovers + data, self._partial = self._partial + data, '' + # Crack into lines, but preserve the newlines on the end of each + parts = NLCRE_crack.split(data) + # The *ahem* interesting behaviour of re.split when supplied grouping + # parentheses is that the last element of the resulting list is the + # data after the final RE. In the case of a NL/CR terminated string, + # this is the empty string. + self._partial = parts.pop() + #GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r: + # is there a \n to follow later? + if not self._partial and parts and parts[-1].endswith('\r'): + self._partial = parts.pop(-2)+parts.pop() + # parts is a list of strings, alternating between the line contents + # and the eol character(s). Gather up a list of lines after + # re-attaching the newlines. + lines = [] + for i in range(len(parts) // 2): + lines.append(parts[i*2] + parts[i*2+1]) + self.pushlines(lines) + + def pushlines(self, lines): + # Reverse and insert at the front of the lines. + self._lines[:0] = lines[::-1] + + def __iter__(self): + return self + + def __next__(self): + line = self.readline() + if line == '': + raise StopIteration + return line + + +class FeedParser(object): + """A feed-style parser of email.""" + + def __init__(self, _factory=message.Message, **_3to2kwargs): + if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] + else: policy = compat32 + """_factory is called with no arguments to create a new message obj + + The policy keyword specifies a policy object that controls a number of + aspects of the parser's operation. The default policy maintains + backward compatibility. + + """ + self._factory = _factory + self.policy = policy + try: + _factory(policy=self.policy) + self._factory_kwds = lambda: {'policy': self.policy} + except TypeError: + # Assume this is an old-style factory + self._factory_kwds = lambda: {} + self._input = BufferedSubFile() + self._msgstack = [] + if PY3: + self._parse = self._parsegen().__next__ + else: + self._parse = self._parsegen().next + self._cur = None + self._last = None + self._headersonly = False + + # Non-public interface for supporting Parser's headersonly flag + def _set_headersonly(self): + self._headersonly = True + + def feed(self, data): + """Push more data into the parser.""" + self._input.push(data) + self._call_parse() + + def _call_parse(self): + try: + self._parse() + except StopIteration: + pass + + def close(self): + """Parse all remaining data and return the root message object.""" + self._input.close() + self._call_parse() + root = self._pop_message() + assert not self._msgstack + # Look for final set of defects + if root.get_content_maintype() == 'multipart' \ + and not root.is_multipart(): + defect = errors.MultipartInvariantViolationDefect() + self.policy.handle_defect(root, defect) + return root + + def _new_message(self): + msg = self._factory(**self._factory_kwds()) + if self._cur and self._cur.get_content_type() == 'multipart/digest': + msg.set_default_type('message/rfc822') + if self._msgstack: + self._msgstack[-1].attach(msg) + self._msgstack.append(msg) + self._cur = msg + self._last = msg + + def _pop_message(self): + retval = self._msgstack.pop() + if self._msgstack: + self._cur = self._msgstack[-1] + else: + self._cur = None + return retval + + def _parsegen(self): + # Create a new message and start by parsing headers. + self._new_message() + headers = [] + # Collect the headers, searching for a line that doesn't match the RFC + # 2822 header or continuation pattern (including an empty line). + for line in self._input: + if line is NeedMoreData: + yield NeedMoreData + continue + if not headerRE.match(line): + # If we saw the RFC defined header/body separator + # (i.e. newline), just throw it away. Otherwise the line is + # part of the body so push it back. + if not NLCRE.match(line): + defect = errors.MissingHeaderBodySeparatorDefect() + self.policy.handle_defect(self._cur, defect) + self._input.unreadline(line) + break + headers.append(line) + # Done with the headers, so parse them and figure out what we're + # supposed to see in the body of the message. + self._parse_headers(headers) + # Headers-only parsing is a backwards compatibility hack, which was + # necessary in the older parser, which could raise errors. All + # remaining lines in the input are thrown into the message body. + if self._headersonly: + lines = [] + while True: + line = self._input.readline() + if line is NeedMoreData: + yield NeedMoreData + continue + if line == '': + break + lines.append(line) + self._cur.set_payload(EMPTYSTRING.join(lines)) + return + if self._cur.get_content_type() == 'message/delivery-status': + # message/delivery-status contains blocks of headers separated by + # a blank line. We'll represent each header block as a separate + # nested message object, but the processing is a bit different + # than standard message/* types because there is no body for the + # nested messages. A blank line separates the subparts. + while True: + self._input.push_eof_matcher(NLCRE.match) + for retval in self._parsegen(): + if retval is NeedMoreData: + yield NeedMoreData + continue + break + msg = self._pop_message() + # We need to pop the EOF matcher in order to tell if we're at + # the end of the current file, not the end of the last block + # of message headers. + self._input.pop_eof_matcher() + # The input stream must be sitting at the newline or at the + # EOF. We want to see if we're at the end of this subpart, so + # first consume the blank line, then test the next line to see + # if we're at this subpart's EOF. + while True: + line = self._input.readline() + if line is NeedMoreData: + yield NeedMoreData + continue + break + while True: + line = self._input.readline() + if line is NeedMoreData: + yield NeedMoreData + continue + break + if line == '': + break + # Not at EOF so this is a line we're going to need. + self._input.unreadline(line) + return + if self._cur.get_content_maintype() == 'message': + # The message claims to be a message/* type, then what follows is + # another RFC 2822 message. + for retval in self._parsegen(): + if retval is NeedMoreData: + yield NeedMoreData + continue + break + self._pop_message() + return + if self._cur.get_content_maintype() == 'multipart': + boundary = self._cur.get_boundary() + if boundary is None: + # The message /claims/ to be a multipart but it has not + # defined a boundary. That's a problem which we'll handle by + # reading everything until the EOF and marking the message as + # defective. + defect = errors.NoBoundaryInMultipartDefect() + self.policy.handle_defect(self._cur, defect) + lines = [] + for line in self._input: + if line is NeedMoreData: + yield NeedMoreData + continue + lines.append(line) + self._cur.set_payload(EMPTYSTRING.join(lines)) + return + # Make sure a valid content type was specified per RFC 2045:6.4. + if (self._cur.get('content-transfer-encoding', '8bit').lower() + not in ('7bit', '8bit', 'binary')): + defect = errors.InvalidMultipartContentTransferEncodingDefect() + self.policy.handle_defect(self._cur, defect) + # Create a line match predicate which matches the inter-part + # boundary as well as the end-of-multipart boundary. Don't push + # this onto the input stream until we've scanned past the + # preamble. + separator = '--' + boundary + boundaryre = re.compile( + '(?P' + re.escape(separator) + + r')(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$') + capturing_preamble = True + preamble = [] + linesep = False + close_boundary_seen = False + while True: + line = self._input.readline() + if line is NeedMoreData: + yield NeedMoreData + continue + if line == '': + break + mo = boundaryre.match(line) + if mo: + # If we're looking at the end boundary, we're done with + # this multipart. If there was a newline at the end of + # the closing boundary, then we need to initialize the + # epilogue with the empty string (see below). + if mo.group('end'): + close_boundary_seen = True + linesep = mo.group('linesep') + break + # We saw an inter-part boundary. Were we in the preamble? + if capturing_preamble: + if preamble: + # According to RFC 2046, the last newline belongs + # to the boundary. + lastline = preamble[-1] + eolmo = NLCRE_eol.search(lastline) + if eolmo: + preamble[-1] = lastline[:-len(eolmo.group(0))] + self._cur.preamble = EMPTYSTRING.join(preamble) + capturing_preamble = False + self._input.unreadline(line) + continue + # We saw a boundary separating two parts. Consume any + # multiple boundary lines that may be following. Our + # interpretation of RFC 2046 BNF grammar does not produce + # body parts within such double boundaries. + while True: + line = self._input.readline() + if line is NeedMoreData: + yield NeedMoreData + continue + mo = boundaryre.match(line) + if not mo: + self._input.unreadline(line) + break + # Recurse to parse this subpart; the input stream points + # at the subpart's first line. + self._input.push_eof_matcher(boundaryre.match) + for retval in self._parsegen(): + if retval is NeedMoreData: + yield NeedMoreData + continue + break + # Because of RFC 2046, the newline preceding the boundary + # separator actually belongs to the boundary, not the + # previous subpart's payload (or epilogue if the previous + # part is a multipart). + if self._last.get_content_maintype() == 'multipart': + epilogue = self._last.epilogue + if epilogue == '': + self._last.epilogue = None + elif epilogue is not None: + mo = NLCRE_eol.search(epilogue) + if mo: + end = len(mo.group(0)) + self._last.epilogue = epilogue[:-end] + else: + payload = self._last._payload + if isinstance(payload, str): + mo = NLCRE_eol.search(payload) + if mo: + payload = payload[:-len(mo.group(0))] + self._last._payload = payload + self._input.pop_eof_matcher() + self._pop_message() + # Set the multipart up for newline cleansing, which will + # happen if we're in a nested multipart. + self._last = self._cur + else: + # I think we must be in the preamble + assert capturing_preamble + preamble.append(line) + # We've seen either the EOF or the end boundary. If we're still + # capturing the preamble, we never saw the start boundary. Note + # that as a defect and store the captured text as the payload. + if capturing_preamble: + defect = errors.StartBoundaryNotFoundDefect() + self.policy.handle_defect(self._cur, defect) + self._cur.set_payload(EMPTYSTRING.join(preamble)) + epilogue = [] + for line in self._input: + if line is NeedMoreData: + yield NeedMoreData + continue + self._cur.epilogue = EMPTYSTRING.join(epilogue) + return + # If we're not processing the preamble, then we might have seen + # EOF without seeing that end boundary...that is also a defect. + if not close_boundary_seen: + defect = errors.CloseBoundaryNotFoundDefect() + self.policy.handle_defect(self._cur, defect) + return + # Everything from here to the EOF is epilogue. If the end boundary + # ended in a newline, we'll need to make sure the epilogue isn't + # None + if linesep: + epilogue = [''] + else: + epilogue = [] + for line in self._input: + if line is NeedMoreData: + yield NeedMoreData + continue + epilogue.append(line) + # Any CRLF at the front of the epilogue is not technically part of + # the epilogue. Also, watch out for an empty string epilogue, + # which means a single newline. + if epilogue: + firstline = epilogue[0] + bolmo = NLCRE_bol.match(firstline) + if bolmo: + epilogue[0] = firstline[len(bolmo.group(0)):] + self._cur.epilogue = EMPTYSTRING.join(epilogue) + return + # Otherwise, it's some non-multipart type, so the entire rest of the + # file contents becomes the payload. + lines = [] + for line in self._input: + if line is NeedMoreData: + yield NeedMoreData + continue + lines.append(line) + self._cur.set_payload(EMPTYSTRING.join(lines)) + + def _parse_headers(self, lines): + # Passed a list of lines that make up the headers for the current msg + lastheader = '' + lastvalue = [] + for lineno, line in enumerate(lines): + # Check for continuation + if line[0] in ' \t': + if not lastheader: + # The first line of the headers was a continuation. This + # is illegal, so let's note the defect, store the illegal + # line, and ignore it for purposes of headers. + defect = errors.FirstHeaderLineIsContinuationDefect(line) + self.policy.handle_defect(self._cur, defect) + continue + lastvalue.append(line) + continue + if lastheader: + self._cur.set_raw(*self.policy.header_source_parse(lastvalue)) + lastheader, lastvalue = '', [] + # Check for envelope header, i.e. unix-from + if line.startswith('From '): + if lineno == 0: + # Strip off the trailing newline + mo = NLCRE_eol.search(line) + if mo: + line = line[:-len(mo.group(0))] + self._cur.set_unixfrom(line) + continue + elif lineno == len(lines) - 1: + # Something looking like a unix-from at the end - it's + # probably the first line of the body, so push back the + # line and stop. + self._input.unreadline(line) + return + else: + # Weirdly placed unix-from line. Note this as a defect + # and ignore it. + defect = errors.MisplacedEnvelopeHeaderDefect(line) + self._cur.defects.append(defect) + continue + # Split the line on the colon separating field name from value. + # There will always be a colon, because if there wasn't the part of + # the parser that calls us would have started parsing the body. + i = line.find(':') + assert i>0, "_parse_headers fed line with no : and no leading WS" + lastheader = line[:i] + lastvalue = [line] + # Done with all the lines, so handle the last header. + if lastheader: + self._cur.set_raw(*self.policy.header_source_parse(lastvalue)) + + +class BytesFeedParser(FeedParser): + """Like FeedParser, but feed accepts bytes.""" + + def feed(self, data): + super().feed(data.decode('ascii', 'surrogateescape')) diff --git a/src/future/backports/email/generator.py b/src/future/backports/email/generator.py new file mode 100644 index 00000000..53493d0a --- /dev/null +++ b/src/future/backports/email/generator.py @@ -0,0 +1,498 @@ +# Copyright (C) 2001-2010 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Classes to generate plain text from a message object tree.""" +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future.builtins import super +from future.builtins import str + +__all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator'] + +import re +import sys +import time +import random +import warnings + +from io import StringIO, BytesIO +from future.backports.email._policybase import compat32 +from future.backports.email.header import Header +from future.backports.email.utils import _has_surrogates +import future.backports.email.charset as _charset + +UNDERSCORE = '_' +NL = '\n' # XXX: no longer used by the code below. + +fcre = re.compile(r'^From ', re.MULTILINE) + + +class Generator(object): + """Generates output from a Message object tree. + + This basic generator writes the message to the given file object as plain + text. + """ + # + # Public interface + # + + def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, **_3to2kwargs): + if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] + else: policy = None + """Create the generator for message flattening. + + outfp is the output file-like object for writing the message to. It + must have a write() method. + + Optional mangle_from_ is a flag that, when True (the default), escapes + From_ lines in the body of the message by putting a `>' in front of + them. + + Optional maxheaderlen specifies the longest length for a non-continued + header. When a header line is longer (in characters, with tabs + expanded to 8 spaces) than maxheaderlen, the header will split as + defined in the Header class. Set maxheaderlen to zero to disable + header wrapping. The default is 78, as recommended (but not required) + by RFC 2822. + + The policy keyword specifies a policy object that controls a number of + aspects of the generator's operation. The default policy maintains + backward compatibility. + + """ + self._fp = outfp + self._mangle_from_ = mangle_from_ + self.maxheaderlen = maxheaderlen + self.policy = policy + + def write(self, s): + # Just delegate to the file object + self._fp.write(s) + + def flatten(self, msg, unixfrom=False, linesep=None): + r"""Print the message object tree rooted at msg to the output file + specified when the Generator instance was created. + + unixfrom is a flag that forces the printing of a Unix From_ delimiter + before the first object in the message tree. If the original message + has no From_ delimiter, a `standard' one is crafted. By default, this + is False to inhibit the printing of any From_ delimiter. + + Note that for subobjects, no From_ line is printed. + + linesep specifies the characters used to indicate a new line in + the output. The default value is determined by the policy. + + """ + # We use the _XXX constants for operating on data that comes directly + # from the msg, and _encoded_XXX constants for operating on data that + # has already been converted (to bytes in the BytesGenerator) and + # inserted into a temporary buffer. + policy = msg.policy if self.policy is None else self.policy + if linesep is not None: + policy = policy.clone(linesep=linesep) + if self.maxheaderlen is not None: + policy = policy.clone(max_line_length=self.maxheaderlen) + self._NL = policy.linesep + self._encoded_NL = self._encode(self._NL) + self._EMPTY = '' + self._encoded_EMTPY = self._encode('') + # Because we use clone (below) when we recursively process message + # subparts, and because clone uses the computed policy (not None), + # submessages will automatically get set to the computed policy when + # they are processed by this code. + old_gen_policy = self.policy + old_msg_policy = msg.policy + try: + self.policy = policy + msg.policy = policy + if unixfrom: + ufrom = msg.get_unixfrom() + if not ufrom: + ufrom = 'From nobody ' + time.ctime(time.time()) + self.write(ufrom + self._NL) + self._write(msg) + finally: + self.policy = old_gen_policy + msg.policy = old_msg_policy + + def clone(self, fp): + """Clone this generator with the exact same options.""" + return self.__class__(fp, + self._mangle_from_, + None, # Use policy setting, which we've adjusted + policy=self.policy) + + # + # Protected interface - undocumented ;/ + # + + # Note that we use 'self.write' when what we are writing is coming from + # the source, and self._fp.write when what we are writing is coming from a + # buffer (because the Bytes subclass has already had a chance to transform + # the data in its write method in that case). This is an entirely + # pragmatic split determined by experiment; we could be more general by + # always using write and having the Bytes subclass write method detect when + # it has already transformed the input; but, since this whole thing is a + # hack anyway this seems good enough. + + # Similarly, we have _XXX and _encoded_XXX attributes that are used on + # source and buffer data, respectively. + _encoded_EMPTY = '' + + def _new_buffer(self): + # BytesGenerator overrides this to return BytesIO. + return StringIO() + + def _encode(self, s): + # BytesGenerator overrides this to encode strings to bytes. + return s + + def _write_lines(self, lines): + # We have to transform the line endings. + if not lines: + return + lines = lines.splitlines(True) + for line in lines[:-1]: + self.write(line.rstrip('\r\n')) + self.write(self._NL) + laststripped = lines[-1].rstrip('\r\n') + self.write(laststripped) + if len(lines[-1]) != len(laststripped): + self.write(self._NL) + + def _write(self, msg): + # We can't write the headers yet because of the following scenario: + # say a multipart message includes the boundary string somewhere in + # its body. We'd have to calculate the new boundary /before/ we write + # the headers so that we can write the correct Content-Type: + # parameter. + # + # The way we do this, so as to make the _handle_*() methods simpler, + # is to cache any subpart writes into a buffer. The we write the + # headers and the buffer contents. That way, subpart handlers can + # Do The Right Thing, and can still modify the Content-Type: header if + # necessary. + oldfp = self._fp + try: + self._fp = sfp = self._new_buffer() + self._dispatch(msg) + finally: + self._fp = oldfp + # Write the headers. First we see if the message object wants to + # handle that itself. If not, we'll do it generically. + meth = getattr(msg, '_write_headers', None) + if meth is None: + self._write_headers(msg) + else: + meth(self) + self._fp.write(sfp.getvalue()) + + def _dispatch(self, msg): + # Get the Content-Type: for the message, then try to dispatch to + # self._handle__(). If there's no handler for the + # full MIME type, then dispatch to self._handle_(). If + # that's missing too, then dispatch to self._writeBody(). + main = msg.get_content_maintype() + sub = msg.get_content_subtype() + specific = UNDERSCORE.join((main, sub)).replace('-', '_') + meth = getattr(self, '_handle_' + specific, None) + if meth is None: + generic = main.replace('-', '_') + meth = getattr(self, '_handle_' + generic, None) + if meth is None: + meth = self._writeBody + meth(msg) + + # + # Default handlers + # + + def _write_headers(self, msg): + for h, v in msg.raw_items(): + self.write(self.policy.fold(h, v)) + # A blank line always separates headers from body + self.write(self._NL) + + # + # Handlers for writing types and subtypes + # + + def _handle_text(self, msg): + payload = msg.get_payload() + if payload is None: + return + if not isinstance(payload, str): + raise TypeError('string payload expected: %s' % type(payload)) + if _has_surrogates(msg._payload): + charset = msg.get_param('charset') + if charset is not None: + del msg['content-transfer-encoding'] + msg.set_payload(payload, charset) + payload = msg.get_payload() + if self._mangle_from_: + payload = fcre.sub('>From ', payload) + self._write_lines(payload) + + # Default body handler + _writeBody = _handle_text + + def _handle_multipart(self, msg): + # The trick here is to write out each part separately, merge them all + # together, and then make sure that the boundary we've chosen isn't + # present in the payload. + msgtexts = [] + subparts = msg.get_payload() + if subparts is None: + subparts = [] + elif isinstance(subparts, str): + # e.g. a non-strict parse of a message with no starting boundary. + self.write(subparts) + return + elif not isinstance(subparts, list): + # Scalar payload + subparts = [subparts] + for part in subparts: + s = self._new_buffer() + g = self.clone(s) + g.flatten(part, unixfrom=False, linesep=self._NL) + msgtexts.append(s.getvalue()) + # BAW: What about boundaries that are wrapped in double-quotes? + boundary = msg.get_boundary() + if not boundary: + # Create a boundary that doesn't appear in any of the + # message texts. + alltext = self._encoded_NL.join(msgtexts) + boundary = self._make_boundary(alltext) + msg.set_boundary(boundary) + # If there's a preamble, write it out, with a trailing CRLF + if msg.preamble is not None: + if self._mangle_from_: + preamble = fcre.sub('>From ', msg.preamble) + else: + preamble = msg.preamble + self._write_lines(preamble) + self.write(self._NL) + # dash-boundary transport-padding CRLF + self.write('--' + boundary + self._NL) + # body-part + if msgtexts: + self._fp.write(msgtexts.pop(0)) + # *encapsulation + # --> delimiter transport-padding + # --> CRLF body-part + for body_part in msgtexts: + # delimiter transport-padding CRLF + self.write(self._NL + '--' + boundary + self._NL) + # body-part + self._fp.write(body_part) + # close-delimiter transport-padding + self.write(self._NL + '--' + boundary + '--') + if msg.epilogue is not None: + self.write(self._NL) + if self._mangle_from_: + epilogue = fcre.sub('>From ', msg.epilogue) + else: + epilogue = msg.epilogue + self._write_lines(epilogue) + + def _handle_multipart_signed(self, msg): + # The contents of signed parts has to stay unmodified in order to keep + # the signature intact per RFC1847 2.1, so we disable header wrapping. + # RDM: This isn't enough to completely preserve the part, but it helps. + p = self.policy + self.policy = p.clone(max_line_length=0) + try: + self._handle_multipart(msg) + finally: + self.policy = p + + def _handle_message_delivery_status(self, msg): + # We can't just write the headers directly to self's file object + # because this will leave an extra newline between the last header + # block and the boundary. Sigh. + blocks = [] + for part in msg.get_payload(): + s = self._new_buffer() + g = self.clone(s) + g.flatten(part, unixfrom=False, linesep=self._NL) + text = s.getvalue() + lines = text.split(self._encoded_NL) + # Strip off the unnecessary trailing empty line + if lines and lines[-1] == self._encoded_EMPTY: + blocks.append(self._encoded_NL.join(lines[:-1])) + else: + blocks.append(text) + # Now join all the blocks with an empty line. This has the lovely + # effect of separating each block with an empty line, but not adding + # an extra one after the last one. + self._fp.write(self._encoded_NL.join(blocks)) + + def _handle_message(self, msg): + s = self._new_buffer() + g = self.clone(s) + # The payload of a message/rfc822 part should be a multipart sequence + # of length 1. The zeroth element of the list should be the Message + # object for the subpart. Extract that object, stringify it, and + # write it out. + # Except, it turns out, when it's a string instead, which happens when + # and only when HeaderParser is used on a message of mime type + # message/rfc822. Such messages are generated by, for example, + # Groupwise when forwarding unadorned messages. (Issue 7970.) So + # in that case we just emit the string body. + payload = msg._payload + if isinstance(payload, list): + g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL) + payload = s.getvalue() + else: + payload = self._encode(payload) + self._fp.write(payload) + + # This used to be a module level function; we use a classmethod for this + # and _compile_re so we can continue to provide the module level function + # for backward compatibility by doing + # _make_boudary = Generator._make_boundary + # at the end of the module. It *is* internal, so we could drop that... + @classmethod + def _make_boundary(cls, text=None): + # Craft a random boundary. If text is given, ensure that the chosen + # boundary doesn't appear in the text. + token = random.randrange(sys.maxsize) + boundary = ('=' * 15) + (_fmt % token) + '==' + if text is None: + return boundary + b = boundary + counter = 0 + while True: + cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE) + if not cre.search(text): + break + b = boundary + '.' + str(counter) + counter += 1 + return b + + @classmethod + def _compile_re(cls, s, flags): + return re.compile(s, flags) + +class BytesGenerator(Generator): + """Generates a bytes version of a Message object tree. + + Functionally identical to the base Generator except that the output is + bytes and not string. When surrogates were used in the input to encode + bytes, these are decoded back to bytes for output. If the policy has + cte_type set to 7bit, then the message is transformed such that the + non-ASCII bytes are properly content transfer encoded, using the charset + unknown-8bit. + + The outfp object must accept bytes in its write method. + """ + + # Bytes versions of this constant for use in manipulating data from + # the BytesIO buffer. + _encoded_EMPTY = b'' + + def write(self, s): + self._fp.write(str(s).encode('ascii', 'surrogateescape')) + + def _new_buffer(self): + return BytesIO() + + def _encode(self, s): + return s.encode('ascii') + + def _write_headers(self, msg): + # This is almost the same as the string version, except for handling + # strings with 8bit bytes. + for h, v in msg.raw_items(): + self._fp.write(self.policy.fold_binary(h, v)) + # A blank line always separates headers from body + self.write(self._NL) + + def _handle_text(self, msg): + # If the string has surrogates the original source was bytes, so + # just write it back out. + if msg._payload is None: + return + if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit': + if self._mangle_from_: + msg._payload = fcre.sub(">From ", msg._payload) + self._write_lines(msg._payload) + else: + super(BytesGenerator,self)._handle_text(msg) + + # Default body handler + _writeBody = _handle_text + + @classmethod + def _compile_re(cls, s, flags): + return re.compile(s.encode('ascii'), flags) + + +_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' + +class DecodedGenerator(Generator): + """Generates a text representation of a message. + + Like the Generator base class, except that non-text parts are substituted + with a format string representing the part. + """ + def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): + """Like Generator.__init__() except that an additional optional + argument is allowed. + + Walks through all subparts of a message. If the subpart is of main + type `text', then it prints the decoded payload of the subpart. + + Otherwise, fmt is a format string that is used instead of the message + payload. fmt is expanded with the following keywords (in + %(keyword)s format): + + type : Full MIME type of the non-text part + maintype : Main MIME type of the non-text part + subtype : Sub-MIME type of the non-text part + filename : Filename of the non-text part + description: Description associated with the non-text part + encoding : Content transfer encoding of the non-text part + + The default value for fmt is None, meaning + + [Non-text (%(type)s) part of message omitted, filename %(filename)s] + """ + Generator.__init__(self, outfp, mangle_from_, maxheaderlen) + if fmt is None: + self._fmt = _FMT + else: + self._fmt = fmt + + def _dispatch(self, msg): + for part in msg.walk(): + maintype = part.get_content_maintype() + if maintype == 'text': + print(part.get_payload(decode=False), file=self) + elif maintype == 'multipart': + # Just skip this + pass + else: + print(self._fmt % { + 'type' : part.get_content_type(), + 'maintype' : part.get_content_maintype(), + 'subtype' : part.get_content_subtype(), + 'filename' : part.get_filename('[no filename]'), + 'description': part.get('Content-Description', + '[no description]'), + 'encoding' : part.get('Content-Transfer-Encoding', + '[no encoding]'), + }, file=self) + + +# Helper used by Generator._make_boundary +_width = len(repr(sys.maxsize-1)) +_fmt = '%%0%dd' % _width + +# Backward compatibility +_make_boundary = Generator._make_boundary diff --git a/src/future/backports/email/header.py b/src/future/backports/email/header.py new file mode 100644 index 00000000..63bf038c --- /dev/null +++ b/src/future/backports/email/header.py @@ -0,0 +1,581 @@ +# Copyright (C) 2002-2007 Python Software Foundation +# Author: Ben Gertzfield, Barry Warsaw +# Contact: email-sig@python.org + +"""Header encoding and decoding functionality.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future.builtins import bytes, range, str, super, zip + +__all__ = [ + 'Header', + 'decode_header', + 'make_header', + ] + +import re +import binascii + +from future.backports import email +from future.backports.email import base64mime +from future.backports.email.errors import HeaderParseError +import future.backports.email.charset as _charset + +# Helpers +from future.backports.email.quoprimime import _max_append, header_decode + +Charset = _charset.Charset + +NL = '\n' +SPACE = ' ' +BSPACE = b' ' +SPACE8 = ' ' * 8 +EMPTYSTRING = '' +MAXLINELEN = 78 +FWS = ' \t' + +USASCII = Charset('us-ascii') +UTF8 = Charset('utf-8') + +# Match encoded-word strings in the form =?charset?q?Hello_World?= +ecre = re.compile(r''' + =\? # literal =? + (?P[^?]*?) # non-greedy up to the next ? is the charset + \? # literal ? + (?P[qb]) # either a "q" or a "b", case insensitive + \? # literal ? + (?P.*?) # non-greedy up to the next ?= is the encoded string + \?= # literal ?= + ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) + +# Field name regexp, including trailing colon, but not separating whitespace, +# according to RFC 2822. Character range is from tilde to exclamation mark. +# For use with .match() +fcre = re.compile(r'[\041-\176]+:$') + +# Find a header embedded in a putative header value. Used to check for +# header injection attack. +_embeded_header = re.compile(r'\n[^ \t]+:') + + +def decode_header(header): + """Decode a message header value without converting charset. + + Returns a list of (string, charset) pairs containing each of the decoded + parts of the header. Charset is None for non-encoded parts of the header, + otherwise a lower-case string containing the name of the character set + specified in the encoded string. + + header may be a string that may or may not contain RFC2047 encoded words, + or it may be a Header object. + + An email.errors.HeaderParseError may be raised when certain decoding error + occurs (e.g. a base64 decoding exception). + """ + # If it is a Header object, we can just return the encoded chunks. + if hasattr(header, '_chunks'): + return [(_charset._encode(string, str(charset)), str(charset)) + for string, charset in header._chunks] + # If no encoding, just return the header with no charset. + if not ecre.search(header): + return [(header, None)] + # First step is to parse all the encoded parts into triplets of the form + # (encoded_string, encoding, charset). For unencoded strings, the last + # two parts will be None. + words = [] + for line in header.splitlines(): + parts = ecre.split(line) + first = True + while parts: + unencoded = parts.pop(0) + if first: + unencoded = unencoded.lstrip() + first = False + if unencoded: + words.append((unencoded, None, None)) + if parts: + charset = parts.pop(0).lower() + encoding = parts.pop(0).lower() + encoded = parts.pop(0) + words.append((encoded, encoding, charset)) + # Now loop over words and remove words that consist of whitespace + # between two encoded strings. + import sys + droplist = [] + for n, w in enumerate(words): + if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace(): + droplist.append(n-1) + for d in reversed(droplist): + del words[d] + + # The next step is to decode each encoded word by applying the reverse + # base64 or quopri transformation. decoded_words is now a list of the + # form (decoded_word, charset). + decoded_words = [] + for encoded_string, encoding, charset in words: + if encoding is None: + # This is an unencoded word. + decoded_words.append((encoded_string, charset)) + elif encoding == 'q': + word = header_decode(encoded_string) + decoded_words.append((word, charset)) + elif encoding == 'b': + paderr = len(encoded_string) % 4 # Postel's law: add missing padding + if paderr: + encoded_string += '==='[:4 - paderr] + try: + word = base64mime.decode(encoded_string) + except binascii.Error: + raise HeaderParseError('Base64 decoding error') + else: + decoded_words.append((word, charset)) + else: + raise AssertionError('Unexpected encoding: ' + encoding) + # Now convert all words to bytes and collapse consecutive runs of + # similarly encoded words. + collapsed = [] + last_word = last_charset = None + for word, charset in decoded_words: + if isinstance(word, str): + word = bytes(word, 'raw-unicode-escape') + if last_word is None: + last_word = word + last_charset = charset + elif charset != last_charset: + collapsed.append((last_word, last_charset)) + last_word = word + last_charset = charset + elif last_charset is None: + last_word += BSPACE + word + else: + last_word += word + collapsed.append((last_word, last_charset)) + return collapsed + + +def make_header(decoded_seq, maxlinelen=None, header_name=None, + continuation_ws=' '): + """Create a Header from a sequence of pairs as returned by decode_header() + + decode_header() takes a header value string and returns a sequence of + pairs of the format (decoded_string, charset) where charset is the string + name of the character set. + + This function takes one of those sequence of pairs and returns a Header + instance. Optional maxlinelen, header_name, and continuation_ws are as in + the Header constructor. + """ + h = Header(maxlinelen=maxlinelen, header_name=header_name, + continuation_ws=continuation_ws) + for s, charset in decoded_seq: + # None means us-ascii but we can simply pass it on to h.append() + if charset is not None and not isinstance(charset, Charset): + charset = Charset(charset) + h.append(s, charset) + return h + + +class Header(object): + def __init__(self, s=None, charset=None, + maxlinelen=None, header_name=None, + continuation_ws=' ', errors='strict'): + """Create a MIME-compliant header that can contain many character sets. + + Optional s is the initial header value. If None, the initial header + value is not set. You can later append to the header with .append() + method calls. s may be a byte string or a Unicode string, but see the + .append() documentation for semantics. + + Optional charset serves two purposes: it has the same meaning as the + charset argument to the .append() method. It also sets the default + character set for all subsequent .append() calls that omit the charset + argument. If charset is not provided in the constructor, the us-ascii + charset is used both as s's initial charset and as the default for + subsequent .append() calls. + + The maximum line length can be specified explicitly via maxlinelen. For + splitting the first line to a shorter value (to account for the field + header which isn't included in s, e.g. `Subject') pass in the name of + the field in header_name. The default maxlinelen is 78 as recommended + by RFC 2822. + + continuation_ws must be RFC 2822 compliant folding whitespace (usually + either a space or a hard tab) which will be prepended to continuation + lines. + + errors is passed through to the .append() call. + """ + if charset is None: + charset = USASCII + elif not isinstance(charset, Charset): + charset = Charset(charset) + self._charset = charset + self._continuation_ws = continuation_ws + self._chunks = [] + if s is not None: + self.append(s, charset, errors) + if maxlinelen is None: + maxlinelen = MAXLINELEN + self._maxlinelen = maxlinelen + if header_name is None: + self._headerlen = 0 + else: + # Take the separating colon and space into account. + self._headerlen = len(header_name) + 2 + + def __str__(self): + """Return the string value of the header.""" + self._normalize() + uchunks = [] + lastcs = None + lastspace = None + for string, charset in self._chunks: + # We must preserve spaces between encoded and non-encoded word + # boundaries, which means for us we need to add a space when we go + # from a charset to None/us-ascii, or from None/us-ascii to a + # charset. Only do this for the second and subsequent chunks. + # Don't add a space if the None/us-ascii string already has + # a space (trailing or leading depending on transition) + nextcs = charset + if nextcs == _charset.UNKNOWN8BIT: + original_bytes = string.encode('ascii', 'surrogateescape') + string = original_bytes.decode('ascii', 'replace') + if uchunks: + hasspace = string and self._nonctext(string[0]) + if lastcs not in (None, 'us-ascii'): + if nextcs in (None, 'us-ascii') and not hasspace: + uchunks.append(SPACE) + nextcs = None + elif nextcs not in (None, 'us-ascii') and not lastspace: + uchunks.append(SPACE) + lastspace = string and self._nonctext(string[-1]) + lastcs = nextcs + uchunks.append(string) + return EMPTYSTRING.join(uchunks) + + # Rich comparison operators for equality only. BAW: does it make sense to + # have or explicitly disable <, <=, >, >= operators? + def __eq__(self, other): + # other may be a Header or a string. Both are fine so coerce + # ourselves to a unicode (of the unencoded header value), swap the + # args and do another comparison. + return other == str(self) + + def __ne__(self, other): + return not self == other + + def append(self, s, charset=None, errors='strict'): + """Append a string to the MIME header. + + Optional charset, if given, should be a Charset instance or the name + of a character set (which will be converted to a Charset instance). A + value of None (the default) means that the charset given in the + constructor is used. + + s may be a byte string or a Unicode string. If it is a byte string + (i.e. isinstance(s, str) is false), then charset is the encoding of + that byte string, and a UnicodeError will be raised if the string + cannot be decoded with that charset. If s is a Unicode string, then + charset is a hint specifying the character set of the characters in + the string. In either case, when producing an RFC 2822 compliant + header using RFC 2047 rules, the string will be encoded using the + output codec of the charset. If the string cannot be encoded to the + output codec, a UnicodeError will be raised. + + Optional `errors' is passed as the errors argument to the decode + call if s is a byte string. + """ + if charset is None: + charset = self._charset + elif not isinstance(charset, Charset): + charset = Charset(charset) + if not isinstance(s, str): + input_charset = charset.input_codec or 'us-ascii' + if input_charset == _charset.UNKNOWN8BIT: + s = s.decode('us-ascii', 'surrogateescape') + else: + s = s.decode(input_charset, errors) + # Ensure that the bytes we're storing can be decoded to the output + # character set, otherwise an early error is raised. + output_charset = charset.output_codec or 'us-ascii' + if output_charset != _charset.UNKNOWN8BIT: + try: + s.encode(output_charset, errors) + except UnicodeEncodeError: + if output_charset!='us-ascii': + raise + charset = UTF8 + self._chunks.append((s, charset)) + + def _nonctext(self, s): + """True if string s is not a ctext character of RFC822. + """ + return s.isspace() or s in ('(', ')', '\\') + + def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): + r"""Encode a message header into an RFC-compliant format. + + There are many issues involved in converting a given string for use in + an email header. Only certain character sets are readable in most + email clients, and as header strings can only contain a subset of + 7-bit ASCII, care must be taken to properly convert and encode (with + Base64 or quoted-printable) header strings. In addition, there is a + 75-character length limit on any given encoded header field, so + line-wrapping must be performed, even with double-byte character sets. + + Optional maxlinelen specifies the maximum length of each generated + line, exclusive of the linesep string. Individual lines may be longer + than maxlinelen if a folding point cannot be found. The first line + will be shorter by the length of the header name plus ": " if a header + name was specified at Header construction time. The default value for + maxlinelen is determined at header construction time. + + Optional splitchars is a string containing characters which should be + given extra weight by the splitting algorithm during normal header + wrapping. This is in very rough support of RFC 2822's `higher level + syntactic breaks': split points preceded by a splitchar are preferred + during line splitting, with the characters preferred in the order in + which they appear in the string. Space and tab may be included in the + string to indicate whether preference should be given to one over the + other as a split point when other split chars do not appear in the line + being split. Splitchars does not affect RFC 2047 encoded lines. + + Optional linesep is a string to be used to separate the lines of + the value. The default value is the most useful for typical + Python applications, but it can be set to \r\n to produce RFC-compliant + line separators when needed. + """ + self._normalize() + if maxlinelen is None: + maxlinelen = self._maxlinelen + # A maxlinelen of 0 means don't wrap. For all practical purposes, + # choosing a huge number here accomplishes that and makes the + # _ValueFormatter algorithm much simpler. + if maxlinelen == 0: + maxlinelen = 1000000 + formatter = _ValueFormatter(self._headerlen, maxlinelen, + self._continuation_ws, splitchars) + lastcs = None + hasspace = lastspace = None + for string, charset in self._chunks: + if hasspace is not None: + hasspace = string and self._nonctext(string[0]) + import sys + if lastcs not in (None, 'us-ascii'): + if not hasspace or charset not in (None, 'us-ascii'): + formatter.add_transition() + elif charset not in (None, 'us-ascii') and not lastspace: + formatter.add_transition() + lastspace = string and self._nonctext(string[-1]) + lastcs = charset + hasspace = False + lines = string.splitlines() + if lines: + formatter.feed('', lines[0], charset) + else: + formatter.feed('', '', charset) + for line in lines[1:]: + formatter.newline() + if charset.header_encoding is not None: + formatter.feed(self._continuation_ws, ' ' + line.lstrip(), + charset) + else: + sline = line.lstrip() + fws = line[:len(line)-len(sline)] + formatter.feed(fws, sline, charset) + if len(lines) > 1: + formatter.newline() + if self._chunks: + formatter.add_transition() + value = formatter._str(linesep) + if _embeded_header.search(value): + raise HeaderParseError("header value appears to contain " + "an embedded header: {!r}".format(value)) + return value + + def _normalize(self): + # Step 1: Normalize the chunks so that all runs of identical charsets + # get collapsed into a single unicode string. + chunks = [] + last_charset = None + last_chunk = [] + for string, charset in self._chunks: + if charset == last_charset: + last_chunk.append(string) + else: + if last_charset is not None: + chunks.append((SPACE.join(last_chunk), last_charset)) + last_chunk = [string] + last_charset = charset + if last_chunk: + chunks.append((SPACE.join(last_chunk), last_charset)) + self._chunks = chunks + + +class _ValueFormatter(object): + def __init__(self, headerlen, maxlen, continuation_ws, splitchars): + self._maxlen = maxlen + self._continuation_ws = continuation_ws + self._continuation_ws_len = len(continuation_ws) + self._splitchars = splitchars + self._lines = [] + self._current_line = _Accumulator(headerlen) + + def _str(self, linesep): + self.newline() + return linesep.join(self._lines) + + def __str__(self): + return self._str(NL) + + def newline(self): + end_of_line = self._current_line.pop() + if end_of_line != (' ', ''): + self._current_line.push(*end_of_line) + if len(self._current_line) > 0: + if self._current_line.is_onlyws(): + self._lines[-1] += str(self._current_line) + else: + self._lines.append(str(self._current_line)) + self._current_line.reset() + + def add_transition(self): + self._current_line.push(' ', '') + + def feed(self, fws, string, charset): + # If the charset has no header encoding (i.e. it is an ASCII encoding) + # then we must split the header at the "highest level syntactic break" + # possible. Note that we don't have a lot of smarts about field + # syntax; we just try to break on semi-colons, then commas, then + # whitespace. Eventually, this should be pluggable. + if charset.header_encoding is None: + self._ascii_split(fws, string, self._splitchars) + return + # Otherwise, we're doing either a Base64 or a quoted-printable + # encoding which means we don't need to split the line on syntactic + # breaks. We can basically just find enough characters to fit on the + # current line, minus the RFC 2047 chrome. What makes this trickier + # though is that we have to split at octet boundaries, not character + # boundaries but it's only safe to split at character boundaries so at + # best we can only get close. + encoded_lines = charset.header_encode_lines(string, self._maxlengths()) + # The first element extends the current line, but if it's None then + # nothing more fit on the current line so start a new line. + try: + first_line = encoded_lines.pop(0) + except IndexError: + # There are no encoded lines, so we're done. + return + if first_line is not None: + self._append_chunk(fws, first_line) + try: + last_line = encoded_lines.pop() + except IndexError: + # There was only one line. + return + self.newline() + self._current_line.push(self._continuation_ws, last_line) + # Everything else are full lines in themselves. + for line in encoded_lines: + self._lines.append(self._continuation_ws + line) + + def _maxlengths(self): + # The first line's length. + yield self._maxlen - len(self._current_line) + while True: + yield self._maxlen - self._continuation_ws_len + + def _ascii_split(self, fws, string, splitchars): + # The RFC 2822 header folding algorithm is simple in principle but + # complex in practice. Lines may be folded any place where "folding + # white space" appears by inserting a linesep character in front of the + # FWS. The complication is that not all spaces or tabs qualify as FWS, + # and we are also supposed to prefer to break at "higher level + # syntactic breaks". We can't do either of these without intimate + # knowledge of the structure of structured headers, which we don't have + # here. So the best we can do here is prefer to break at the specified + # splitchars, and hope that we don't choose any spaces or tabs that + # aren't legal FWS. (This is at least better than the old algorithm, + # where we would sometimes *introduce* FWS after a splitchar, or the + # algorithm before that, where we would turn all white space runs into + # single spaces or tabs.) + parts = re.split("(["+FWS+"]+)", fws+string) + if parts[0]: + parts[:0] = [''] + else: + parts.pop(0) + for fws, part in zip(*[iter(parts)]*2): + self._append_chunk(fws, part) + + def _append_chunk(self, fws, string): + self._current_line.push(fws, string) + if len(self._current_line) > self._maxlen: + # Find the best split point, working backward from the end. + # There might be none, on a long first line. + for ch in self._splitchars: + for i in range(self._current_line.part_count()-1, 0, -1): + if ch.isspace(): + fws = self._current_line[i][0] + if fws and fws[0]==ch: + break + prevpart = self._current_line[i-1][1] + if prevpart and prevpart[-1]==ch: + break + else: + continue + break + else: + fws, part = self._current_line.pop() + if self._current_line._initial_size > 0: + # There will be a header, so leave it on a line by itself. + self.newline() + if not fws: + # We don't use continuation_ws here because the whitespace + # after a header should always be a space. + fws = ' ' + self._current_line.push(fws, part) + return + remainder = self._current_line.pop_from(i) + self._lines.append(str(self._current_line)) + self._current_line.reset(remainder) + + +class _Accumulator(list): + + def __init__(self, initial_size=0): + self._initial_size = initial_size + super().__init__() + + def push(self, fws, string): + self.append((fws, string)) + + def pop_from(self, i=0): + popped = self[i:] + self[i:] = [] + return popped + + def pop(self): + if self.part_count()==0: + return ('', '') + return super().pop() + + def __len__(self): + return sum((len(fws)+len(part) for fws, part in self), + self._initial_size) + + def __str__(self): + return EMPTYSTRING.join((EMPTYSTRING.join((fws, part)) + for fws, part in self)) + + def reset(self, startval=None): + if startval is None: + startval = [] + self[:] = startval + self._initial_size = 0 + + def is_onlyws(self): + return self._initial_size==0 and (not self or str(self).isspace()) + + def part_count(self): + return super().__len__() diff --git a/src/future/backports/email/headerregistry.py b/src/future/backports/email/headerregistry.py new file mode 100644 index 00000000..9aaad65a --- /dev/null +++ b/src/future/backports/email/headerregistry.py @@ -0,0 +1,592 @@ +"""Representing and manipulating email headers via custom objects. + +This module provides an implementation of the HeaderRegistry API. +The implementation is designed to flexibly follow RFC5322 rules. + +Eventually HeaderRegistry will be a public API, but it isn't yet, +and will probably change some before that happens. + +""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +from future.builtins import super +from future.builtins import str +from future.utils import text_to_native_str +from future.backports.email import utils +from future.backports.email import errors +from future.backports.email import _header_value_parser as parser + +class Address(object): + + def __init__(self, display_name='', username='', domain='', addr_spec=None): + """Create an object represeting a full email address. + + An address can have a 'display_name', a 'username', and a 'domain'. In + addition to specifying the username and domain separately, they may be + specified together by using the addr_spec keyword *instead of* the + username and domain keywords. If an addr_spec string is specified it + must be properly quoted according to RFC 5322 rules; an error will be + raised if it is not. + + An Address object has display_name, username, domain, and addr_spec + attributes, all of which are read-only. The addr_spec and the string + value of the object are both quoted according to RFC5322 rules, but + without any Content Transfer Encoding. + + """ + # This clause with its potential 'raise' may only happen when an + # application program creates an Address object using an addr_spec + # keyword. The email library code itself must always supply username + # and domain. + if addr_spec is not None: + if username or domain: + raise TypeError("addrspec specified when username and/or " + "domain also specified") + a_s, rest = parser.get_addr_spec(addr_spec) + if rest: + raise ValueError("Invalid addr_spec; only '{}' " + "could be parsed from '{}'".format( + a_s, addr_spec)) + if a_s.all_defects: + raise a_s.all_defects[0] + username = a_s.local_part + domain = a_s.domain + self._display_name = display_name + self._username = username + self._domain = domain + + @property + def display_name(self): + return self._display_name + + @property + def username(self): + return self._username + + @property + def domain(self): + return self._domain + + @property + def addr_spec(self): + """The addr_spec (username@domain) portion of the address, quoted + according to RFC 5322 rules, but with no Content Transfer Encoding. + """ + nameset = set(self.username) + if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS): + lp = parser.quote_string(self.username) + else: + lp = self.username + if self.domain: + return lp + '@' + self.domain + if not lp: + return '<>' + return lp + + def __repr__(self): + return "Address(display_name={!r}, username={!r}, domain={!r})".format( + self.display_name, self.username, self.domain) + + def __str__(self): + nameset = set(self.display_name) + if len(nameset) > len(nameset-parser.SPECIALS): + disp = parser.quote_string(self.display_name) + else: + disp = self.display_name + if disp: + addr_spec = '' if self.addr_spec=='<>' else self.addr_spec + return "{} <{}>".format(disp, addr_spec) + return self.addr_spec + + def __eq__(self, other): + if type(other) != type(self): + return False + return (self.display_name == other.display_name and + self.username == other.username and + self.domain == other.domain) + + +class Group(object): + + def __init__(self, display_name=None, addresses=None): + """Create an object representing an address group. + + An address group consists of a display_name followed by colon and an + list of addresses (see Address) terminated by a semi-colon. The Group + is created by specifying a display_name and a possibly empty list of + Address objects. A Group can also be used to represent a single + address that is not in a group, which is convenient when manipulating + lists that are a combination of Groups and individual Addresses. In + this case the display_name should be set to None. In particular, the + string representation of a Group whose display_name is None is the same + as the Address object, if there is one and only one Address object in + the addresses list. + + """ + self._display_name = display_name + self._addresses = tuple(addresses) if addresses else tuple() + + @property + def display_name(self): + return self._display_name + + @property + def addresses(self): + return self._addresses + + def __repr__(self): + return "Group(display_name={!r}, addresses={!r}".format( + self.display_name, self.addresses) + + def __str__(self): + if self.display_name is None and len(self.addresses)==1: + return str(self.addresses[0]) + disp = self.display_name + if disp is not None: + nameset = set(disp) + if len(nameset) > len(nameset-parser.SPECIALS): + disp = parser.quote_string(disp) + adrstr = ", ".join(str(x) for x in self.addresses) + adrstr = ' ' + adrstr if adrstr else adrstr + return "{}:{};".format(disp, adrstr) + + def __eq__(self, other): + if type(other) != type(self): + return False + return (self.display_name == other.display_name and + self.addresses == other.addresses) + + +# Header Classes # + +class BaseHeader(str): + + """Base class for message headers. + + Implements generic behavior and provides tools for subclasses. + + A subclass must define a classmethod named 'parse' that takes an unfolded + value string and a dictionary as its arguments. The dictionary will + contain one key, 'defects', initialized to an empty list. After the call + the dictionary must contain two additional keys: parse_tree, set to the + parse tree obtained from parsing the header, and 'decoded', set to the + string value of the idealized representation of the data from the value. + (That is, encoded words are decoded, and values that have canonical + representations are so represented.) + + The defects key is intended to collect parsing defects, which the message + parser will subsequently dispose of as appropriate. The parser should not, + insofar as practical, raise any errors. Defects should be added to the + list instead. The standard header parsers register defects for RFC + compliance issues, for obsolete RFC syntax, and for unrecoverable parsing + errors. + + The parse method may add additional keys to the dictionary. In this case + the subclass must define an 'init' method, which will be passed the + dictionary as its keyword arguments. The method should use (usually by + setting them as the value of similarly named attributes) and remove all the + extra keys added by its parse method, and then use super to call its parent + class with the remaining arguments and keywords. + + The subclass should also make sure that a 'max_count' attribute is defined + that is either None or 1. XXX: need to better define this API. + + """ + + def __new__(cls, name, value): + kwds = {'defects': []} + cls.parse(value, kwds) + if utils._has_surrogates(kwds['decoded']): + kwds['decoded'] = utils._sanitize(kwds['decoded']) + self = str.__new__(cls, kwds['decoded']) + # del kwds['decoded'] + self.init(name, **kwds) + return self + + def init(self, name, **_3to2kwargs): + defects = _3to2kwargs['defects']; del _3to2kwargs['defects'] + parse_tree = _3to2kwargs['parse_tree']; del _3to2kwargs['parse_tree'] + self._name = name + self._parse_tree = parse_tree + self._defects = defects + + @property + def name(self): + return self._name + + @property + def defects(self): + return tuple(self._defects) + + def __reduce__(self): + return ( + _reconstruct_header, + ( + self.__class__.__name__, + self.__class__.__bases__, + str(self), + ), + self.__dict__) + + @classmethod + def _reconstruct(cls, value): + return str.__new__(cls, value) + + def fold(self, **_3to2kwargs): + policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] + """Fold header according to policy. + + The parsed representation of the header is folded according to + RFC5322 rules, as modified by the policy. If the parse tree + contains surrogateescaped bytes, the bytes are CTE encoded using + the charset 'unknown-8bit". + + Any non-ASCII characters in the parse tree are CTE encoded using + charset utf-8. XXX: make this a policy setting. + + The returned value is an ASCII-only string possibly containing linesep + characters, and ending with a linesep character. The string includes + the header name and the ': ' separator. + + """ + # At some point we need to only put fws here if it was in the source. + header = parser.Header([ + parser.HeaderLabel([ + parser.ValueTerminal(self.name, 'header-name'), + parser.ValueTerminal(':', 'header-sep')]), + parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]), + self._parse_tree]) + return header.fold(policy=policy) + + +def _reconstruct_header(cls_name, bases, value): + return type(text_to_native_str(cls_name), bases, {})._reconstruct(value) + + +class UnstructuredHeader(object): + + max_count = None + value_parser = staticmethod(parser.get_unstructured) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = cls.value_parser(value) + kwds['decoded'] = str(kwds['parse_tree']) + + +class UniqueUnstructuredHeader(UnstructuredHeader): + + max_count = 1 + + +class DateHeader(object): + + """Header whose value consists of a single timestamp. + + Provides an additional attribute, datetime, which is either an aware + datetime using a timezone, or a naive datetime if the timezone + in the input string is -0000. Also accepts a datetime as input. + The 'value' attribute is the normalized form of the timestamp, + which means it is the output of format_datetime on the datetime. + """ + + max_count = None + + # This is used only for folding, not for creating 'decoded'. + value_parser = staticmethod(parser.get_unstructured) + + @classmethod + def parse(cls, value, kwds): + if not value: + kwds['defects'].append(errors.HeaderMissingRequiredValue()) + kwds['datetime'] = None + kwds['decoded'] = '' + kwds['parse_tree'] = parser.TokenList() + return + if isinstance(value, str): + value = utils.parsedate_to_datetime(value) + kwds['datetime'] = value + kwds['decoded'] = utils.format_datetime(kwds['datetime']) + kwds['parse_tree'] = cls.value_parser(kwds['decoded']) + + def init(self, *args, **kw): + self._datetime = kw.pop('datetime') + super().init(*args, **kw) + + @property + def datetime(self): + return self._datetime + + +class UniqueDateHeader(DateHeader): + + max_count = 1 + + +class AddressHeader(object): + + max_count = None + + @staticmethod + def value_parser(value): + address_list, value = parser.get_address_list(value) + assert not value, 'this should not happen' + return address_list + + @classmethod + def parse(cls, value, kwds): + if isinstance(value, str): + # We are translating here from the RFC language (address/mailbox) + # to our API language (group/address). + kwds['parse_tree'] = address_list = cls.value_parser(value) + groups = [] + for addr in address_list.addresses: + groups.append(Group(addr.display_name, + [Address(mb.display_name or '', + mb.local_part or '', + mb.domain or '') + for mb in addr.all_mailboxes])) + defects = list(address_list.all_defects) + else: + # Assume it is Address/Group stuff + if not hasattr(value, '__iter__'): + value = [value] + groups = [Group(None, [item]) if not hasattr(item, 'addresses') + else item + for item in value] + defects = [] + kwds['groups'] = groups + kwds['defects'] = defects + kwds['decoded'] = ', '.join([str(item) for item in groups]) + if 'parse_tree' not in kwds: + kwds['parse_tree'] = cls.value_parser(kwds['decoded']) + + def init(self, *args, **kw): + self._groups = tuple(kw.pop('groups')) + self._addresses = None + super().init(*args, **kw) + + @property + def groups(self): + return self._groups + + @property + def addresses(self): + if self._addresses is None: + self._addresses = tuple([address for group in self._groups + for address in group.addresses]) + return self._addresses + + +class UniqueAddressHeader(AddressHeader): + + max_count = 1 + + +class SingleAddressHeader(AddressHeader): + + @property + def address(self): + if len(self.addresses)!=1: + raise ValueError(("value of single address header {} is not " + "a single address").format(self.name)) + return self.addresses[0] + + +class UniqueSingleAddressHeader(SingleAddressHeader): + + max_count = 1 + + +class MIMEVersionHeader(object): + + max_count = 1 + + value_parser = staticmethod(parser.parse_mime_version) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + kwds['major'] = None if parse_tree.minor is None else parse_tree.major + kwds['minor'] = parse_tree.minor + if parse_tree.minor is not None: + kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor']) + else: + kwds['version'] = None + + def init(self, *args, **kw): + self._version = kw.pop('version') + self._major = kw.pop('major') + self._minor = kw.pop('minor') + super().init(*args, **kw) + + @property + def major(self): + return self._major + + @property + def minor(self): + return self._minor + + @property + def version(self): + return self._version + + +class ParameterizedMIMEHeader(object): + + # Mixin that handles the params dict. Must be subclassed and + # a property value_parser for the specific header provided. + + max_count = 1 + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + if parse_tree.params is None: + kwds['params'] = {} + else: + # The MIME RFCs specify that parameter ordering is arbitrary. + kwds['params'] = dict((utils._sanitize(name).lower(), + utils._sanitize(value)) + for name, value in parse_tree.params) + + def init(self, *args, **kw): + self._params = kw.pop('params') + super().init(*args, **kw) + + @property + def params(self): + return self._params.copy() + + +class ContentTypeHeader(ParameterizedMIMEHeader): + + value_parser = staticmethod(parser.parse_content_type_header) + + def init(self, *args, **kw): + super().init(*args, **kw) + self._maintype = utils._sanitize(self._parse_tree.maintype) + self._subtype = utils._sanitize(self._parse_tree.subtype) + + @property + def maintype(self): + return self._maintype + + @property + def subtype(self): + return self._subtype + + @property + def content_type(self): + return self.maintype + '/' + self.subtype + + +class ContentDispositionHeader(ParameterizedMIMEHeader): + + value_parser = staticmethod(parser.parse_content_disposition_header) + + def init(self, *args, **kw): + super().init(*args, **kw) + cd = self._parse_tree.content_disposition + self._content_disposition = cd if cd is None else utils._sanitize(cd) + + @property + def content_disposition(self): + return self._content_disposition + + +class ContentTransferEncodingHeader(object): + + max_count = 1 + + value_parser = staticmethod(parser.parse_content_transfer_encoding_header) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + + def init(self, *args, **kw): + super().init(*args, **kw) + self._cte = utils._sanitize(self._parse_tree.cte) + + @property + def cte(self): + return self._cte + + +# The header factory # + +_default_header_map = { + 'subject': UniqueUnstructuredHeader, + 'date': UniqueDateHeader, + 'resent-date': DateHeader, + 'orig-date': UniqueDateHeader, + 'sender': UniqueSingleAddressHeader, + 'resent-sender': SingleAddressHeader, + 'to': UniqueAddressHeader, + 'resent-to': AddressHeader, + 'cc': UniqueAddressHeader, + 'resent-cc': AddressHeader, + 'bcc': UniqueAddressHeader, + 'resent-bcc': AddressHeader, + 'from': UniqueAddressHeader, + 'resent-from': AddressHeader, + 'reply-to': UniqueAddressHeader, + 'mime-version': MIMEVersionHeader, + 'content-type': ContentTypeHeader, + 'content-disposition': ContentDispositionHeader, + 'content-transfer-encoding': ContentTransferEncodingHeader, + } + +class HeaderRegistry(object): + + """A header_factory and header registry.""" + + def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader, + use_default_map=True): + """Create a header_factory that works with the Policy API. + + base_class is the class that will be the last class in the created + header class's __bases__ list. default_class is the class that will be + used if "name" (see __call__) does not appear in the registry. + use_default_map controls whether or not the default mapping of names to + specialized classes is copied in to the registry when the factory is + created. The default is True. + + """ + self.registry = {} + self.base_class = base_class + self.default_class = default_class + if use_default_map: + self.registry.update(_default_header_map) + + def map_to_type(self, name, cls): + """Register cls as the specialized class for handling "name" headers. + + """ + self.registry[name.lower()] = cls + + def __getitem__(self, name): + cls = self.registry.get(name.lower(), self.default_class) + return type(text_to_native_str('_'+cls.__name__), (cls, self.base_class), {}) + + def __call__(self, name, value): + """Create a header instance for header 'name' from 'value'. + + Creates a header instance by creating a specialized class for parsing + and representing the specified header by combining the factory + base_class with a specialized class from the registry or the + default_class, and passing the name and value to the constructed + class's constructor. + + """ + return self[name](name, value) diff --git a/src/future/backports/email/iterators.py b/src/future/backports/email/iterators.py new file mode 100644 index 00000000..82d320f8 --- /dev/null +++ b/src/future/backports/email/iterators.py @@ -0,0 +1,74 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Various types of useful iterators and generators.""" +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = [ + 'body_line_iterator', + 'typed_subpart_iterator', + 'walk', + # Do not include _structure() since it's part of the debugging API. + ] + +import sys +from io import StringIO + + +# This function will become a method of the Message class +def walk(self): + """Walk over the message tree, yielding each subpart. + + The walk is performed in depth-first order. This method is a + generator. + """ + yield self + if self.is_multipart(): + for subpart in self.get_payload(): + for subsubpart in subpart.walk(): + yield subsubpart + + +# These two functions are imported into the Iterators.py interface module. +def body_line_iterator(msg, decode=False): + """Iterate over the parts, returning string payloads line-by-line. + + Optional decode (default False) is passed through to .get_payload(). + """ + for subpart in msg.walk(): + payload = subpart.get_payload(decode=decode) + if isinstance(payload, str): + for line in StringIO(payload): + yield line + + +def typed_subpart_iterator(msg, maintype='text', subtype=None): + """Iterate over the subparts with a given MIME type. + + Use `maintype' as the main MIME type to match against; this defaults to + "text". Optional `subtype' is the MIME subtype to match against; if + omitted, only the main type is matched. + """ + for subpart in msg.walk(): + if subpart.get_content_maintype() == maintype: + if subtype is None or subpart.get_content_subtype() == subtype: + yield subpart + + +def _structure(msg, fp=None, level=0, include_default=False): + """A handy debugging aid""" + if fp is None: + fp = sys.stdout + tab = ' ' * (level * 4) + print(tab + msg.get_content_type(), end='', file=fp) + if include_default: + print(' [%s]' % msg.get_default_type(), file=fp) + else: + print(file=fp) + if msg.is_multipart(): + for subpart in msg.get_payload(): + _structure(subpart, fp, level+1, include_default) diff --git a/src/future/backports/email/message.py b/src/future/backports/email/message.py new file mode 100644 index 00000000..d8d9615d --- /dev/null +++ b/src/future/backports/email/message.py @@ -0,0 +1,882 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2001-2007 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Basic message object for the email package object model.""" +from __future__ import absolute_import, division, unicode_literals +from future.builtins import list, range, str, zip + +__all__ = ['Message'] + +import re +import uu +import base64 +import binascii +from io import BytesIO, StringIO + +# Intrapackage imports +from future.utils import as_native_str +from future.backports.email import utils +from future.backports.email import errors +from future.backports.email._policybase import compat32 +from future.backports.email import charset as _charset +from future.backports.email._encoded_words import decode_b +Charset = _charset.Charset + +SEMISPACE = '; ' + +# Regular expression that matches `special' characters in parameters, the +# existence of which force quoting of the parameter value. +tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') + + +def _splitparam(param): + # Split header parameters. BAW: this may be too simple. It isn't + # strictly RFC 2045 (section 5.1) compliant, but it catches most headers + # found in the wild. We may eventually need a full fledged parser. + # RDM: we might have a Header here; for now just stringify it. + a, sep, b = str(param).partition(';') + if not sep: + return a.strip(), None + return a.strip(), b.strip() + +def _formatparam(param, value=None, quote=True): + """Convenience function to format and return a key=value pair. + + This will quote the value if needed or if quote is true. If value is a + three tuple (charset, language, value), it will be encoded according + to RFC2231 rules. If it contains non-ascii characters it will likewise + be encoded according to RFC2231 rules, using the utf-8 charset and + a null language. + """ + if value is not None and len(value) > 0: + # A tuple is used for RFC 2231 encoded parameter values where items + # are (charset, language, value). charset is a string, not a Charset + # instance. RFC 2231 encoded values are never quoted, per RFC. + if isinstance(value, tuple): + # Encode as per RFC 2231 + param += '*' + value = utils.encode_rfc2231(value[2], value[0], value[1]) + return '%s=%s' % (param, value) + else: + try: + value.encode('ascii') + except UnicodeEncodeError: + param += '*' + value = utils.encode_rfc2231(value, 'utf-8', '') + return '%s=%s' % (param, value) + # BAW: Please check this. I think that if quote is set it should + # force quoting even if not necessary. + if quote or tspecials.search(value): + return '%s="%s"' % (param, utils.quote(value)) + else: + return '%s=%s' % (param, value) + else: + return param + +def _parseparam(s): + # RDM This might be a Header, so for now stringify it. + s = ';' + str(s) + plist = [] + while s[:1] == ';': + s = s[1:] + end = s.find(';') + while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: + end = s.find(';', end + 1) + if end < 0: + end = len(s) + f = s[:end] + if '=' in f: + i = f.index('=') + f = f[:i].strip().lower() + '=' + f[i+1:].strip() + plist.append(f.strip()) + s = s[end:] + return plist + + +def _unquotevalue(value): + # This is different than utils.collapse_rfc2231_value() because it doesn't + # try to convert the value to a unicode. Message.get_param() and + # Message.get_params() are both currently defined to return the tuple in + # the face of RFC 2231 parameters. + if isinstance(value, tuple): + return value[0], value[1], utils.unquote(value[2]) + else: + return utils.unquote(value) + + +class Message(object): + """Basic message object. + + A message object is defined as something that has a bunch of RFC 2822 + headers and a payload. It may optionally have an envelope header + (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a + multipart or a message/rfc822), then the payload is a list of Message + objects, otherwise it is a string. + + Message objects implement part of the `mapping' interface, which assumes + there is exactly one occurrence of the header per message. Some headers + do in fact appear multiple times (e.g. Received) and for those headers, + you must use the explicit API to set or get all the headers. Not all of + the mapping methods are implemented. + """ + def __init__(self, policy=compat32): + self.policy = policy + self._headers = list() + self._unixfrom = None + self._payload = None + self._charset = None + # Defaults for multipart messages + self.preamble = self.epilogue = None + self.defects = [] + # Default content type + self._default_type = 'text/plain' + + @as_native_str(encoding='utf-8') + def __str__(self): + """Return the entire formatted message as a string. + This includes the headers, body, and envelope header. + """ + return self.as_string() + + def as_string(self, unixfrom=False, maxheaderlen=0): + """Return the entire formatted message as a (unicode) string. + Optional `unixfrom' when True, means include the Unix From_ envelope + header. + + This is a convenience method and may not generate the message exactly + as you intend. For more flexibility, use the flatten() method of a + Generator instance. + """ + from future.backports.email.generator import Generator + fp = StringIO() + g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen) + g.flatten(self, unixfrom=unixfrom) + return fp.getvalue() + + def is_multipart(self): + """Return True if the message consists of multiple parts.""" + return isinstance(self._payload, list) + + # + # Unix From_ line + # + def set_unixfrom(self, unixfrom): + self._unixfrom = unixfrom + + def get_unixfrom(self): + return self._unixfrom + + # + # Payload manipulation. + # + def attach(self, payload): + """Add the given payload to the current payload. + + The current payload will always be a list of objects after this method + is called. If you want to set the payload to a scalar object, use + set_payload() instead. + """ + if self._payload is None: + self._payload = [payload] + else: + self._payload.append(payload) + + def get_payload(self, i=None, decode=False): + """Return a reference to the payload. + + The payload will either be a list object or a string. If you mutate + the list object, you modify the message's payload in place. Optional + i returns that index into the payload. + + Optional decode is a flag indicating whether the payload should be + decoded or not, according to the Content-Transfer-Encoding header + (default is False). + + When True and the message is not a multipart, the payload will be + decoded if this header's value is `quoted-printable' or `base64'. If + some other encoding is used, or the header is missing, or if the + payload has bogus data (i.e. bogus base64 or uuencoded data), the + payload is returned as-is. + + If the message is a multipart and the decode flag is True, then None + is returned. + """ + # Here is the logic table for this code, based on the email5.0.0 code: + # i decode is_multipart result + # ------ ------ ------------ ------------------------------ + # None True True None + # i True True None + # None False True _payload (a list) + # i False True _payload element i (a Message) + # i False False error (not a list) + # i True False error (not a list) + # None False False _payload + # None True False _payload decoded (bytes) + # Note that Barry planned to factor out the 'decode' case, but that + # isn't so easy now that we handle the 8 bit data, which needs to be + # converted in both the decode and non-decode path. + if self.is_multipart(): + if decode: + return None + if i is None: + return self._payload + else: + return self._payload[i] + # For backward compatibility, Use isinstance and this error message + # instead of the more logical is_multipart test. + if i is not None and not isinstance(self._payload, list): + raise TypeError('Expected list, got %s' % type(self._payload)) + payload = self._payload + # cte might be a Header, so for now stringify it. + cte = str(self.get('content-transfer-encoding', '')).lower() + # payload may be bytes here. + if isinstance(payload, str): + payload = str(payload) # for Python-Future, so surrogateescape works + if utils._has_surrogates(payload): + bpayload = payload.encode('ascii', 'surrogateescape') + if not decode: + try: + payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') + except LookupError: + payload = bpayload.decode('ascii', 'replace') + elif decode: + try: + bpayload = payload.encode('ascii') + except UnicodeError: + # This won't happen for RFC compliant messages (messages + # containing only ASCII codepoints in the unicode input). + # If it does happen, turn the string into bytes in a way + # guaranteed not to fail. + bpayload = payload.encode('raw-unicode-escape') + if not decode: + return payload + if cte == 'quoted-printable': + return utils._qdecode(bpayload) + elif cte == 'base64': + # XXX: this is a bit of a hack; decode_b should probably be factored + # out somewhere, but I haven't figured out where yet. + value, defects = decode_b(b''.join(bpayload.splitlines())) + for defect in defects: + self.policy.handle_defect(self, defect) + return value + elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): + in_file = BytesIO(bpayload) + out_file = BytesIO() + try: + uu.decode(in_file, out_file, quiet=True) + return out_file.getvalue() + except uu.Error: + # Some decoding problem + return bpayload + if isinstance(payload, str): + return bpayload + return payload + + def set_payload(self, payload, charset=None): + """Set the payload to the given value. + + Optional charset sets the message's default character set. See + set_charset() for details. + """ + self._payload = payload + if charset is not None: + self.set_charset(charset) + + def set_charset(self, charset): + """Set the charset of the payload to a given character set. + + charset can be a Charset instance, a string naming a character set, or + None. If it is a string it will be converted to a Charset instance. + If charset is None, the charset parameter will be removed from the + Content-Type field. Anything else will generate a TypeError. + + The message will be assumed to be of type text/* encoded with + charset.input_charset. It will be converted to charset.output_charset + and encoded properly, if needed, when generating the plain text + representation of the message. MIME headers (MIME-Version, + Content-Type, Content-Transfer-Encoding) will be added as needed. + """ + if charset is None: + self.del_param('charset') + self._charset = None + return + if not isinstance(charset, Charset): + charset = Charset(charset) + self._charset = charset + if 'MIME-Version' not in self: + self.add_header('MIME-Version', '1.0') + if 'Content-Type' not in self: + self.add_header('Content-Type', 'text/plain', + charset=charset.get_output_charset()) + else: + self.set_param('charset', charset.get_output_charset()) + if charset != charset.get_output_charset(): + self._payload = charset.body_encode(self._payload) + if 'Content-Transfer-Encoding' not in self: + cte = charset.get_body_encoding() + try: + cte(self) + except TypeError: + self._payload = charset.body_encode(self._payload) + self.add_header('Content-Transfer-Encoding', cte) + + def get_charset(self): + """Return the Charset instance associated with the message's payload. + """ + return self._charset + + # + # MAPPING INTERFACE (partial) + # + def __len__(self): + """Return the total number of headers, including duplicates.""" + return len(self._headers) + + def __getitem__(self, name): + """Get a header value. + + Return None if the header is missing instead of raising an exception. + + Note that if the header appeared multiple times, exactly which + occurrence gets returned is undefined. Use get_all() to get all + the values matching a header field name. + """ + return self.get(name) + + def __setitem__(self, name, val): + """Set the value of a header. + + Note: this does not overwrite an existing header with the same field + name. Use __delitem__() first to delete any existing headers. + """ + max_count = self.policy.header_max_count(name) + if max_count: + lname = name.lower() + found = 0 + for k, v in self._headers: + if k.lower() == lname: + found += 1 + if found >= max_count: + raise ValueError("There may be at most {} {} headers " + "in a message".format(max_count, name)) + self._headers.append(self.policy.header_store_parse(name, val)) + + def __delitem__(self, name): + """Delete all occurrences of a header, if present. + + Does not raise an exception if the header is missing. + """ + name = name.lower() + newheaders = list() + for k, v in self._headers: + if k.lower() != name: + newheaders.append((k, v)) + self._headers = newheaders + + def __contains__(self, name): + return name.lower() in [k.lower() for k, v in self._headers] + + def __iter__(self): + for field, value in self._headers: + yield field + + def keys(self): + """Return a list of all the message's header field names. + + These will be sorted in the order they appeared in the original + message, or were added to the message, and may contain duplicates. + Any fields deleted and re-inserted are always appended to the header + list. + """ + return [k for k, v in self._headers] + + def values(self): + """Return a list of all the message's header values. + + These will be sorted in the order they appeared in the original + message, or were added to the message, and may contain duplicates. + Any fields deleted and re-inserted are always appended to the header + list. + """ + return [self.policy.header_fetch_parse(k, v) + for k, v in self._headers] + + def items(self): + """Get all the message's header fields and values. + + These will be sorted in the order they appeared in the original + message, or were added to the message, and may contain duplicates. + Any fields deleted and re-inserted are always appended to the header + list. + """ + return [(k, self.policy.header_fetch_parse(k, v)) + for k, v in self._headers] + + def get(self, name, failobj=None): + """Get a header value. + + Like __getitem__() but return failobj instead of None when the field + is missing. + """ + name = name.lower() + for k, v in self._headers: + if k.lower() == name: + return self.policy.header_fetch_parse(k, v) + return failobj + + # + # "Internal" methods (public API, but only intended for use by a parser + # or generator, not normal application code. + # + + def set_raw(self, name, value): + """Store name and value in the model without modification. + + This is an "internal" API, intended only for use by a parser. + """ + self._headers.append((name, value)) + + def raw_items(self): + """Return the (name, value) header pairs without modification. + + This is an "internal" API, intended only for use by a generator. + """ + return iter(self._headers.copy()) + + # + # Additional useful stuff + # + + def get_all(self, name, failobj=None): + """Return a list of all the values for the named field. + + These will be sorted in the order they appeared in the original + message, and may contain duplicates. Any fields deleted and + re-inserted are always appended to the header list. + + If no such fields exist, failobj is returned (defaults to None). + """ + values = [] + name = name.lower() + for k, v in self._headers: + if k.lower() == name: + values.append(self.policy.header_fetch_parse(k, v)) + if not values: + return failobj + return values + + def add_header(self, _name, _value, **_params): + """Extended header setting. + + name is the header field to add. keyword arguments can be used to set + additional parameters for the header field, with underscores converted + to dashes. Normally the parameter will be added as key="value" unless + value is None, in which case only the key will be added. If a + parameter value contains non-ASCII characters it can be specified as a + three-tuple of (charset, language, value), in which case it will be + encoded according to RFC2231 rules. Otherwise it will be encoded using + the utf-8 charset and a language of ''. + + Examples: + + msg.add_header('content-disposition', 'attachment', filename='bud.gif') + msg.add_header('content-disposition', 'attachment', + filename=('utf-8', '', 'Fußballer.ppt')) + msg.add_header('content-disposition', 'attachment', + filename='Fußballer.ppt')) + """ + parts = [] + for k, v in _params.items(): + if v is None: + parts.append(k.replace('_', '-')) + else: + parts.append(_formatparam(k.replace('_', '-'), v)) + if _value is not None: + parts.insert(0, _value) + self[_name] = SEMISPACE.join(parts) + + def replace_header(self, _name, _value): + """Replace a header. + + Replace the first matching header found in the message, retaining + header order and case. If no matching header was found, a KeyError is + raised. + """ + _name = _name.lower() + for i, (k, v) in zip(range(len(self._headers)), self._headers): + if k.lower() == _name: + self._headers[i] = self.policy.header_store_parse(k, _value) + break + else: + raise KeyError(_name) + + # + # Use these three methods instead of the three above. + # + + def get_content_type(self): + """Return the message's content type. + + The returned string is coerced to lower case of the form + `maintype/subtype'. If there was no Content-Type header in the + message, the default type as given by get_default_type() will be + returned. Since according to RFC 2045, messages always have a default + type this will always return a value. + + RFC 2045 defines a message's default type to be text/plain unless it + appears inside a multipart/digest container, in which case it would be + message/rfc822. + """ + missing = object() + value = self.get('content-type', missing) + if value is missing: + # This should have no parameters + return self.get_default_type() + ctype = _splitparam(value)[0].lower() + # RFC 2045, section 5.2 says if its invalid, use text/plain + if ctype.count('/') != 1: + return 'text/plain' + return ctype + + def get_content_maintype(self): + """Return the message's main content type. + + This is the `maintype' part of the string returned by + get_content_type(). + """ + ctype = self.get_content_type() + return ctype.split('/')[0] + + def get_content_subtype(self): + """Returns the message's sub-content type. + + This is the `subtype' part of the string returned by + get_content_type(). + """ + ctype = self.get_content_type() + return ctype.split('/')[1] + + def get_default_type(self): + """Return the `default' content type. + + Most messages have a default content type of text/plain, except for + messages that are subparts of multipart/digest containers. Such + subparts have a default content type of message/rfc822. + """ + return self._default_type + + def set_default_type(self, ctype): + """Set the `default' content type. + + ctype should be either "text/plain" or "message/rfc822", although this + is not enforced. The default content type is not stored in the + Content-Type header. + """ + self._default_type = ctype + + def _get_params_preserve(self, failobj, header): + # Like get_params() but preserves the quoting of values. BAW: + # should this be part of the public interface? + missing = object() + value = self.get(header, missing) + if value is missing: + return failobj + params = [] + for p in _parseparam(value): + try: + name, val = p.split('=', 1) + name = name.strip() + val = val.strip() + except ValueError: + # Must have been a bare attribute + name = p.strip() + val = '' + params.append((name, val)) + params = utils.decode_params(params) + return params + + def get_params(self, failobj=None, header='content-type', unquote=True): + """Return the message's Content-Type parameters, as a list. + + The elements of the returned list are 2-tuples of key/value pairs, as + split on the `=' sign. The left hand side of the `=' is the key, + while the right hand side is the value. If there is no `=' sign in + the parameter the value is the empty string. The value is as + described in the get_param() method. + + Optional failobj is the object to return if there is no Content-Type + header. Optional header is the header to search instead of + Content-Type. If unquote is True, the value is unquoted. + """ + missing = object() + params = self._get_params_preserve(missing, header) + if params is missing: + return failobj + if unquote: + return [(k, _unquotevalue(v)) for k, v in params] + else: + return params + + def get_param(self, param, failobj=None, header='content-type', + unquote=True): + """Return the parameter value if found in the Content-Type header. + + Optional failobj is the object to return if there is no Content-Type + header, or the Content-Type header has no such parameter. Optional + header is the header to search instead of Content-Type. + + Parameter keys are always compared case insensitively. The return + value can either be a string, or a 3-tuple if the parameter was RFC + 2231 encoded. When it's a 3-tuple, the elements of the value are of + the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and + LANGUAGE can be None, in which case you should consider VALUE to be + encoded in the us-ascii charset. You can usually ignore LANGUAGE. + The parameter value (either the returned string, or the VALUE item in + the 3-tuple) is always unquoted, unless unquote is set to False. + + If your application doesn't care whether the parameter was RFC 2231 + encoded, it can turn the return value into a string as follows: + + param = msg.get_param('foo') + param = email.utils.collapse_rfc2231_value(rawparam) + + """ + if header not in self: + return failobj + for k, v in self._get_params_preserve(failobj, header): + if k.lower() == param.lower(): + if unquote: + return _unquotevalue(v) + else: + return v + return failobj + + def set_param(self, param, value, header='Content-Type', requote=True, + charset=None, language=''): + """Set a parameter in the Content-Type header. + + If the parameter already exists in the header, its value will be + replaced with the new value. + + If header is Content-Type and has not yet been defined for this + message, it will be set to "text/plain" and the new parameter and + value will be appended as per RFC 2045. + + An alternate header can specified in the header argument, and all + parameters will be quoted as necessary unless requote is False. + + If charset is specified, the parameter will be encoded according to RFC + 2231. Optional language specifies the RFC 2231 language, defaulting + to the empty string. Both charset and language should be strings. + """ + if not isinstance(value, tuple) and charset: + value = (charset, language, value) + + if header not in self and header.lower() == 'content-type': + ctype = 'text/plain' + else: + ctype = self.get(header) + if not self.get_param(param, header=header): + if not ctype: + ctype = _formatparam(param, value, requote) + else: + ctype = SEMISPACE.join( + [ctype, _formatparam(param, value, requote)]) + else: + ctype = '' + for old_param, old_value in self.get_params(header=header, + unquote=requote): + append_param = '' + if old_param.lower() == param.lower(): + append_param = _formatparam(param, value, requote) + else: + append_param = _formatparam(old_param, old_value, requote) + if not ctype: + ctype = append_param + else: + ctype = SEMISPACE.join([ctype, append_param]) + if ctype != self.get(header): + del self[header] + self[header] = ctype + + def del_param(self, param, header='content-type', requote=True): + """Remove the given parameter completely from the Content-Type header. + + The header will be re-written in place without the parameter or its + value. All values will be quoted as necessary unless requote is + False. Optional header specifies an alternative to the Content-Type + header. + """ + if header not in self: + return + new_ctype = '' + for p, v in self.get_params(header=header, unquote=requote): + if p.lower() != param.lower(): + if not new_ctype: + new_ctype = _formatparam(p, v, requote) + else: + new_ctype = SEMISPACE.join([new_ctype, + _formatparam(p, v, requote)]) + if new_ctype != self.get(header): + del self[header] + self[header] = new_ctype + + def set_type(self, type, header='Content-Type', requote=True): + """Set the main type and subtype for the Content-Type header. + + type must be a string in the form "maintype/subtype", otherwise a + ValueError is raised. + + This method replaces the Content-Type header, keeping all the + parameters in place. If requote is False, this leaves the existing + header's quoting as is. Otherwise, the parameters will be quoted (the + default). + + An alternative header can be specified in the header argument. When + the Content-Type header is set, we'll always also add a MIME-Version + header. + """ + # BAW: should we be strict? + if not type.count('/') == 1: + raise ValueError + # Set the Content-Type, you get a MIME-Version + if header.lower() == 'content-type': + del self['mime-version'] + self['MIME-Version'] = '1.0' + if header not in self: + self[header] = type + return + params = self.get_params(header=header, unquote=requote) + del self[header] + self[header] = type + # Skip the first param; it's the old type. + for p, v in params[1:]: + self.set_param(p, v, header, requote) + + def get_filename(self, failobj=None): + """Return the filename associated with the payload if present. + + The filename is extracted from the Content-Disposition header's + `filename' parameter, and it is unquoted. If that header is missing + the `filename' parameter, this method falls back to looking for the + `name' parameter. + """ + missing = object() + filename = self.get_param('filename', missing, 'content-disposition') + if filename is missing: + filename = self.get_param('name', missing, 'content-type') + if filename is missing: + return failobj + return utils.collapse_rfc2231_value(filename).strip() + + def get_boundary(self, failobj=None): + """Return the boundary associated with the payload if present. + + The boundary is extracted from the Content-Type header's `boundary' + parameter, and it is unquoted. + """ + missing = object() + boundary = self.get_param('boundary', missing) + if boundary is missing: + return failobj + # RFC 2046 says that boundaries may begin but not end in w/s + return utils.collapse_rfc2231_value(boundary).rstrip() + + def set_boundary(self, boundary): + """Set the boundary parameter in Content-Type to 'boundary'. + + This is subtly different than deleting the Content-Type header and + adding a new one with a new boundary parameter via add_header(). The + main difference is that using the set_boundary() method preserves the + order of the Content-Type header in the original message. + + HeaderParseError is raised if the message has no Content-Type header. + """ + missing = object() + params = self._get_params_preserve(missing, 'content-type') + if params is missing: + # There was no Content-Type header, and we don't know what type + # to set it to, so raise an exception. + raise errors.HeaderParseError('No Content-Type header found') + newparams = list() + foundp = False + for pk, pv in params: + if pk.lower() == 'boundary': + newparams.append(('boundary', '"%s"' % boundary)) + foundp = True + else: + newparams.append((pk, pv)) + if not foundp: + # The original Content-Type header had no boundary attribute. + # Tack one on the end. BAW: should we raise an exception + # instead??? + newparams.append(('boundary', '"%s"' % boundary)) + # Replace the existing Content-Type header with the new value + newheaders = list() + for h, v in self._headers: + if h.lower() == 'content-type': + parts = list() + for k, v in newparams: + if v == '': + parts.append(k) + else: + parts.append('%s=%s' % (k, v)) + val = SEMISPACE.join(parts) + newheaders.append(self.policy.header_store_parse(h, val)) + + else: + newheaders.append((h, v)) + self._headers = newheaders + + def get_content_charset(self, failobj=None): + """Return the charset parameter of the Content-Type header. + + The returned string is always coerced to lower case. If there is no + Content-Type header, or if that header has no charset parameter, + failobj is returned. + """ + missing = object() + charset = self.get_param('charset', missing) + if charset is missing: + return failobj + if isinstance(charset, tuple): + # RFC 2231 encoded, so decode it, and it better end up as ascii. + pcharset = charset[0] or 'us-ascii' + try: + # LookupError will be raised if the charset isn't known to + # Python. UnicodeError will be raised if the encoded text + # contains a character not in the charset. + as_bytes = charset[2].encode('raw-unicode-escape') + charset = str(as_bytes, pcharset) + except (LookupError, UnicodeError): + charset = charset[2] + # charset characters must be in us-ascii range + try: + charset.encode('us-ascii') + except UnicodeError: + return failobj + # RFC 2046, $4.1.2 says charsets are not case sensitive + return charset.lower() + + def get_charsets(self, failobj=None): + """Return a list containing the charset(s) used in this message. + + The returned list of items describes the Content-Type headers' + charset parameter for this message and all the subparts in its + payload. + + Each item will either be a string (the value of the charset parameter + in the Content-Type header of that part) or the value of the + 'failobj' parameter (defaults to None), if the part does not have a + main MIME type of "text", or the charset is not defined. + + The list will contain one string for each part of the message, plus + one for the container message (i.e. self), so that a non-multipart + message will still return a list of length 1. + """ + return [part.get_content_charset(failobj) for part in self.walk()] + + # I.e. def walk(self): ... + from future.backports.email.iterators import walk diff --git a/future/standard_library/http/__init__.py b/src/future/backports/email/mime/__init__.py similarity index 100% rename from future/standard_library/http/__init__.py rename to src/future/backports/email/mime/__init__.py diff --git a/src/future/backports/email/mime/application.py b/src/future/backports/email/mime/application.py new file mode 100644 index 00000000..5cbfb174 --- /dev/null +++ b/src/future/backports/email/mime/application.py @@ -0,0 +1,39 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Keith Dart +# Contact: email-sig@python.org + +"""Class representing application/* type MIME documents.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +from future.backports.email import encoders +from future.backports.email.mime.nonmultipart import MIMENonMultipart + +__all__ = ["MIMEApplication"] + + +class MIMEApplication(MIMENonMultipart): + """Class for generating application/* MIME documents.""" + + def __init__(self, _data, _subtype='octet-stream', + _encoder=encoders.encode_base64, **_params): + """Create an application/* type MIME document. + + _data is a string containing the raw application data. + + _subtype is the MIME content type subtype, defaulting to + 'octet-stream'. + + _encoder is a function which will perform the actual encoding for + transport of the application data, defaulting to base64 encoding. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + raise TypeError('Invalid application MIME subtype') + MIMENonMultipart.__init__(self, 'application', _subtype, **_params) + self.set_payload(_data) + _encoder(self) diff --git a/src/future/backports/email/mime/audio.py b/src/future/backports/email/mime/audio.py new file mode 100644 index 00000000..4989c114 --- /dev/null +++ b/src/future/backports/email/mime/audio.py @@ -0,0 +1,74 @@ +# Copyright (C) 2001-2007 Python Software Foundation +# Author: Anthony Baxter +# Contact: email-sig@python.org + +"""Class representing audio/* type MIME documents.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = ['MIMEAudio'] + +import sndhdr + +from io import BytesIO +from future.backports.email import encoders +from future.backports.email.mime.nonmultipart import MIMENonMultipart + + +_sndhdr_MIMEmap = {'au' : 'basic', + 'wav' :'x-wav', + 'aiff':'x-aiff', + 'aifc':'x-aiff', + } + +# There are others in sndhdr that don't have MIME types. :( +# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? +def _whatsnd(data): + """Try to identify a sound file type. + + sndhdr.what() has a pretty cruddy interface, unfortunately. This is why + we re-do it here. It would be easier to reverse engineer the Unix 'file' + command and use the standard 'magic' file, as shipped with a modern Unix. + """ + hdr = data[:512] + fakefile = BytesIO(hdr) + for testfn in sndhdr.tests: + res = testfn(hdr, fakefile) + if res is not None: + return _sndhdr_MIMEmap.get(res[0]) + return None + + +class MIMEAudio(MIMENonMultipart): + """Class for generating audio/* MIME documents.""" + + def __init__(self, _audiodata, _subtype=None, + _encoder=encoders.encode_base64, **_params): + """Create an audio/* type MIME document. + + _audiodata is a string containing the raw audio data. If this data + can be decoded by the standard Python `sndhdr' module, then the + subtype will be automatically included in the Content-Type header. + Otherwise, you can specify the specific audio subtype via the + _subtype parameter. If _subtype is not given, and no subtype can be + guessed, a TypeError is raised. + + _encoder is a function which will perform the actual encoding for + transport of the image data. It takes one argument, which is this + Image instance. It should use get_payload() and set_payload() to + change the payload to the encoded form. It should also add any + Content-Transfer-Encoding or other headers to the message as + necessary. The default encoding is Base64. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + _subtype = _whatsnd(_audiodata) + if _subtype is None: + raise TypeError('Could not find audio MIME subtype') + MIMENonMultipart.__init__(self, 'audio', _subtype, **_params) + self.set_payload(_audiodata) + _encoder(self) diff --git a/src/future/backports/email/mime/base.py b/src/future/backports/email/mime/base.py new file mode 100644 index 00000000..e77f3ca4 --- /dev/null +++ b/src/future/backports/email/mime/base.py @@ -0,0 +1,25 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Base class for MIME specializations.""" +from __future__ import absolute_import, division, unicode_literals +from future.backports.email import message + +__all__ = ['MIMEBase'] + + +class MIMEBase(message.Message): + """Base class for MIME specializations.""" + + def __init__(self, _maintype, _subtype, **_params): + """This constructor adds a Content-Type: and a MIME-Version: header. + + The Content-Type: header is taken from the _maintype and _subtype + arguments. Additional parameters for this header are taken from the + keyword arguments. + """ + message.Message.__init__(self) + ctype = '%s/%s' % (_maintype, _subtype) + self.add_header('Content-Type', ctype, **_params) + self['MIME-Version'] = '1.0' diff --git a/src/future/backports/email/mime/image.py b/src/future/backports/email/mime/image.py new file mode 100644 index 00000000..a0360246 --- /dev/null +++ b/src/future/backports/email/mime/image.py @@ -0,0 +1,48 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Class representing image/* type MIME documents.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = ['MIMEImage'] + +import imghdr + +from future.backports.email import encoders +from future.backports.email.mime.nonmultipart import MIMENonMultipart + + +class MIMEImage(MIMENonMultipart): + """Class for generating image/* type MIME documents.""" + + def __init__(self, _imagedata, _subtype=None, + _encoder=encoders.encode_base64, **_params): + """Create an image/* type MIME document. + + _imagedata is a string containing the raw image data. If this data + can be decoded by the standard Python `imghdr' module, then the + subtype will be automatically included in the Content-Type header. + Otherwise, you can specify the specific image subtype via the _subtype + parameter. + + _encoder is a function which will perform the actual encoding for + transport of the image data. It takes one argument, which is this + Image instance. It should use get_payload() and set_payload() to + change the payload to the encoded form. It should also add any + Content-Transfer-Encoding or other headers to the message as + necessary. The default encoding is Base64. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + _subtype = imghdr.what(None, _imagedata) + if _subtype is None: + raise TypeError('Could not guess image MIME subtype') + MIMENonMultipart.__init__(self, 'image', _subtype, **_params) + self.set_payload(_imagedata) + _encoder(self) diff --git a/src/future/backports/email/mime/message.py b/src/future/backports/email/mime/message.py new file mode 100644 index 00000000..7f920751 --- /dev/null +++ b/src/future/backports/email/mime/message.py @@ -0,0 +1,36 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Class representing message/* MIME documents.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = ['MIMEMessage'] + +from future.backports.email import message +from future.backports.email.mime.nonmultipart import MIMENonMultipart + + +class MIMEMessage(MIMENonMultipart): + """Class representing message/* MIME documents.""" + + def __init__(self, _msg, _subtype='rfc822'): + """Create a message/* type MIME document. + + _msg is a message object and must be an instance of Message, or a + derived class of Message, otherwise a TypeError is raised. + + Optional _subtype defines the subtype of the contained message. The + default is "rfc822" (this is defined by the MIME standard, even though + the term "rfc822" is technically outdated by RFC 2822). + """ + MIMENonMultipart.__init__(self, 'message', _subtype) + if not isinstance(_msg, message.Message): + raise TypeError('Argument is not an instance of Message') + # It's convenient to use this base class method. We need to do it + # this way or we'll get an exception + message.Message.attach(self, _msg) + # And be sure our default type is set correctly + self.set_default_type('message/rfc822') diff --git a/src/future/backports/email/mime/multipart.py b/src/future/backports/email/mime/multipart.py new file mode 100644 index 00000000..6d7ed3dc --- /dev/null +++ b/src/future/backports/email/mime/multipart.py @@ -0,0 +1,49 @@ +# Copyright (C) 2002-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Base class for MIME multipart/* type messages.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = ['MIMEMultipart'] + +from future.backports.email.mime.base import MIMEBase + + +class MIMEMultipart(MIMEBase): + """Base class for MIME multipart/* type messages.""" + + def __init__(self, _subtype='mixed', boundary=None, _subparts=None, + **_params): + """Creates a multipart/* type message. + + By default, creates a multipart/mixed message, with proper + Content-Type and MIME-Version headers. + + _subtype is the subtype of the multipart content type, defaulting to + `mixed'. + + boundary is the multipart boundary string. By default it is + calculated as needed. + + _subparts is a sequence of initial subparts for the payload. It + must be an iterable object, such as a list. You can always + attach new subparts to the message by using the attach() method. + + Additional parameters for the Content-Type header are taken from the + keyword arguments (or passed into the _params argument). + """ + MIMEBase.__init__(self, 'multipart', _subtype, **_params) + + # Initialise _payload to an empty list as the Message superclass's + # implementation of is_multipart assumes that _payload is a list for + # multipart messages. + self._payload = [] + + if _subparts: + for p in _subparts: + self.attach(p) + if boundary: + self.set_boundary(boundary) diff --git a/src/future/backports/email/mime/nonmultipart.py b/src/future/backports/email/mime/nonmultipart.py new file mode 100644 index 00000000..08c37c36 --- /dev/null +++ b/src/future/backports/email/mime/nonmultipart.py @@ -0,0 +1,24 @@ +# Copyright (C) 2002-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Base class for MIME type messages that are not multipart.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = ['MIMENonMultipart'] + +from future.backports.email import errors +from future.backports.email.mime.base import MIMEBase + + +class MIMENonMultipart(MIMEBase): + """Base class for MIME multipart/* type messages.""" + + def attach(self, payload): + # The public API prohibits attaching multiple subparts to MIMEBase + # derived subtypes since none of them are, by definition, of content + # type multipart/* + raise errors.MultipartConversionError( + 'Cannot attach additional subparts to non-multipart/*') diff --git a/src/future/backports/email/mime/text.py b/src/future/backports/email/mime/text.py new file mode 100644 index 00000000..6269f4a6 --- /dev/null +++ b/src/future/backports/email/mime/text.py @@ -0,0 +1,44 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Class representing text/* type MIME documents.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = ['MIMEText'] + +from future.backports.email.encoders import encode_7or8bit +from future.backports.email.mime.nonmultipart import MIMENonMultipart + + +class MIMEText(MIMENonMultipart): + """Class for generating text/* type MIME documents.""" + + def __init__(self, _text, _subtype='plain', _charset=None): + """Create a text/* type MIME document. + + _text is the string for this message object. + + _subtype is the MIME sub content type, defaulting to "plain". + + _charset is the character set parameter added to the Content-Type + header. This defaults to "us-ascii". Note that as a side-effect, the + Content-Transfer-Encoding header will also be set. + """ + + # If no _charset was specified, check to see if there are non-ascii + # characters present. If not, use 'us-ascii', otherwise use utf-8. + # XXX: This can be removed once #7304 is fixed. + if _charset is None: + try: + _text.encode('us-ascii') + _charset = 'us-ascii' + except UnicodeEncodeError: + _charset = 'utf-8' + + MIMENonMultipart.__init__(self, 'text', _subtype, + **{'charset': _charset}) + + self.set_payload(_text, _charset) diff --git a/src/future/backports/email/parser.py b/src/future/backports/email/parser.py new file mode 100644 index 00000000..79f0e5a3 --- /dev/null +++ b/src/future/backports/email/parser.py @@ -0,0 +1,135 @@ +# Copyright (C) 2001-2007 Python Software Foundation +# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter +# Contact: email-sig@python.org + +"""A parser of RFC 2822 and MIME email messages.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser'] + +import warnings +from io import StringIO, TextIOWrapper + +from future.backports.email.feedparser import FeedParser, BytesFeedParser +from future.backports.email.message import Message +from future.backports.email._policybase import compat32 + + +class Parser(object): + def __init__(self, _class=Message, **_3to2kwargs): + """Parser of RFC 2822 and MIME email messages. + + Creates an in-memory object tree representing the email message, which + can then be manipulated and turned over to a Generator to return the + textual representation of the message. + + The string must be formatted as a block of RFC 2822 headers and header + continuation lines, optionally preceded by a `Unix-from' header. The + header block is terminated either by the end of the string or by a + blank line. + + _class is the class to instantiate for new message objects when they + must be created. This class must have a constructor that can take + zero arguments. Default is Message.Message. + + The policy keyword specifies a policy object that controls a number of + aspects of the parser's operation. The default policy maintains + backward compatibility. + + """ + if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] + else: policy = compat32 + self._class = _class + self.policy = policy + + def parse(self, fp, headersonly=False): + """Create a message structure from the data in a file. + + Reads all the data from the file and returns the root of the message + structure. Optional headersonly is a flag specifying whether to stop + parsing after reading the headers or not. The default is False, + meaning it parses the entire contents of the file. + """ + feedparser = FeedParser(self._class, policy=self.policy) + if headersonly: + feedparser._set_headersonly() + while True: + data = fp.read(8192) + if not data: + break + feedparser.feed(data) + return feedparser.close() + + def parsestr(self, text, headersonly=False): + """Create a message structure from a string. + + Returns the root of the message structure. Optional headersonly is a + flag specifying whether to stop parsing after reading the headers or + not. The default is False, meaning it parses the entire contents of + the file. + """ + return self.parse(StringIO(text), headersonly=headersonly) + + + +class HeaderParser(Parser): + def parse(self, fp, headersonly=True): + return Parser.parse(self, fp, True) + + def parsestr(self, text, headersonly=True): + return Parser.parsestr(self, text, True) + + +class BytesParser(object): + + def __init__(self, *args, **kw): + """Parser of binary RFC 2822 and MIME email messages. + + Creates an in-memory object tree representing the email message, which + can then be manipulated and turned over to a Generator to return the + textual representation of the message. + + The input must be formatted as a block of RFC 2822 headers and header + continuation lines, optionally preceded by a `Unix-from' header. The + header block is terminated either by the end of the input or by a + blank line. + + _class is the class to instantiate for new message objects when they + must be created. This class must have a constructor that can take + zero arguments. Default is Message.Message. + """ + self.parser = Parser(*args, **kw) + + def parse(self, fp, headersonly=False): + """Create a message structure from the data in a binary file. + + Reads all the data from the file and returns the root of the message + structure. Optional headersonly is a flag specifying whether to stop + parsing after reading the headers or not. The default is False, + meaning it parses the entire contents of the file. + """ + fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape') + with fp: + return self.parser.parse(fp, headersonly) + + + def parsebytes(self, text, headersonly=False): + """Create a message structure from a byte string. + + Returns the root of the message structure. Optional headersonly is a + flag specifying whether to stop parsing after reading the headers or + not. The default is False, meaning it parses the entire contents of + the file. + """ + text = text.decode('ASCII', errors='surrogateescape') + return self.parser.parsestr(text, headersonly) + + +class BytesHeaderParser(BytesParser): + def parse(self, fp, headersonly=True): + return BytesParser.parse(self, fp, headersonly=True) + + def parsebytes(self, text, headersonly=True): + return BytesParser.parsebytes(self, text, headersonly=True) diff --git a/src/future/backports/email/policy.py b/src/future/backports/email/policy.py new file mode 100644 index 00000000..2f609a23 --- /dev/null +++ b/src/future/backports/email/policy.py @@ -0,0 +1,193 @@ +"""This will be the home for the policy that hooks in the new +code that adds all the email6 features. +""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future.builtins import super + +from future.standard_library.email._policybase import (Policy, Compat32, + compat32, _extend_docstrings) +from future.standard_library.email.utils import _has_surrogates +from future.standard_library.email.headerregistry import HeaderRegistry as HeaderRegistry + +__all__ = [ + 'Compat32', + 'compat32', + 'Policy', + 'EmailPolicy', + 'default', + 'strict', + 'SMTP', + 'HTTP', + ] + +@_extend_docstrings +class EmailPolicy(Policy): + + """+ + PROVISIONAL + + The API extensions enabled by this policy are currently provisional. + Refer to the documentation for details. + + This policy adds new header parsing and folding algorithms. Instead of + simple strings, headers are custom objects with custom attributes + depending on the type of the field. The folding algorithm fully + implements RFCs 2047 and 5322. + + In addition to the settable attributes listed above that apply to + all Policies, this policy adds the following additional attributes: + + refold_source -- if the value for a header in the Message object + came from the parsing of some source, this attribute + indicates whether or not a generator should refold + that value when transforming the message back into + stream form. The possible values are: + + none -- all source values use original folding + long -- source values that have any line that is + longer than max_line_length will be + refolded + all -- all values are refolded. + + The default is 'long'. + + header_factory -- a callable that takes two arguments, 'name' and + 'value', where 'name' is a header field name and + 'value' is an unfolded header field value, and + returns a string-like object that represents that + header. A default header_factory is provided that + understands some of the RFC5322 header field types. + (Currently address fields and date fields have + special treatment, while all other fields are + treated as unstructured. This list will be + completed before the extension is marked stable.) + """ + + refold_source = 'long' + header_factory = HeaderRegistry() + + def __init__(self, **kw): + # Ensure that each new instance gets a unique header factory + # (as opposed to clones, which share the factory). + if 'header_factory' not in kw: + object.__setattr__(self, 'header_factory', HeaderRegistry()) + super().__init__(**kw) + + def header_max_count(self, name): + """+ + The implementation for this class returns the max_count attribute from + the specialized header class that would be used to construct a header + of type 'name'. + """ + return self.header_factory[name].max_count + + # The logic of the next three methods is chosen such that it is possible to + # switch a Message object between a Compat32 policy and a policy derived + # from this class and have the results stay consistent. This allows a + # Message object constructed with this policy to be passed to a library + # that only handles Compat32 objects, or to receive such an object and + # convert it to use the newer style by just changing its policy. It is + # also chosen because it postpones the relatively expensive full rfc5322 + # parse until as late as possible when parsing from source, since in many + # applications only a few headers will actually be inspected. + + def header_source_parse(self, sourcelines): + """+ + The name is parsed as everything up to the ':' and returned unmodified. + The value is determined by stripping leading whitespace off the + remainder of the first line, joining all subsequent lines together, and + stripping any trailing carriage return or linefeed characters. (This + is the same as Compat32). + + """ + name, value = sourcelines[0].split(':', 1) + value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + return (name, value.rstrip('\r\n')) + + def header_store_parse(self, name, value): + """+ + The name is returned unchanged. If the input value has a 'name' + attribute and it matches the name ignoring case, the value is returned + unchanged. Otherwise the name and value are passed to header_factory + method, and the resulting custom header object is returned as the + value. In this case a ValueError is raised if the input value contains + CR or LF characters. + + """ + if hasattr(value, 'name') and value.name.lower() == name.lower(): + return (name, value) + if isinstance(value, str) and len(value.splitlines())>1: + raise ValueError("Header values may not contain linefeed " + "or carriage return characters") + return (name, self.header_factory(name, value)) + + def header_fetch_parse(self, name, value): + """+ + If the value has a 'name' attribute, it is returned to unmodified. + Otherwise the name and the value with any linesep characters removed + are passed to the header_factory method, and the resulting custom + header object is returned. Any surrogateescaped bytes get turned + into the unicode unknown-character glyph. + + """ + if hasattr(value, 'name'): + return value + return self.header_factory(name, ''.join(value.splitlines())) + + def fold(self, name, value): + """+ + Header folding is controlled by the refold_source policy setting. A + value is considered to be a 'source value' if and only if it does not + have a 'name' attribute (having a 'name' attribute means it is a header + object of some sort). If a source value needs to be refolded according + to the policy, it is converted into a custom header object by passing + the name and the value with any linesep characters removed to the + header_factory method. Folding of a custom header object is done by + calling its fold method with the current policy. + + Source values are split into lines using splitlines. If the value is + not to be refolded, the lines are rejoined using the linesep from the + policy and returned. The exception is lines containing non-ascii + binary data. In that case the value is refolded regardless of the + refold_source setting, which causes the binary data to be CTE encoded + using the unknown-8bit charset. + + """ + return self._fold(name, value, refold_binary=True) + + def fold_binary(self, name, value): + """+ + The same as fold if cte_type is 7bit, except that the returned value is + bytes. + + If cte_type is 8bit, non-ASCII binary data is converted back into + bytes. Headers with binary data are not refolded, regardless of the + refold_header setting, since there is no way to know whether the binary + data consists of single byte characters or multibyte characters. + + """ + folded = self._fold(name, value, refold_binary=self.cte_type=='7bit') + return folded.encode('ascii', 'surrogateescape') + + def _fold(self, name, value, refold_binary=False): + if hasattr(value, 'name'): + return value.fold(policy=self) + maxlen = self.max_line_length if self.max_line_length else float('inf') + lines = value.splitlines() + refold = (self.refold_source == 'all' or + self.refold_source == 'long' and + (lines and len(lines[0])+len(name)+2 > maxlen or + any(len(x) > maxlen for x in lines[1:]))) + if refold or refold_binary and _has_surrogates(value): + return self.header_factory(name, ''.join(lines)).fold(policy=self) + return name + ': ' + self.linesep.join(lines) + self.linesep + + +default = EmailPolicy() +# Make the default policy use the class default header_factory +del default.header_factory +strict = default.clone(raise_on_defect=True) +SMTP = default.clone(linesep='\r\n') +HTTP = default.clone(linesep='\r\n', max_line_length=None) diff --git a/src/future/backports/email/quoprimime.py b/src/future/backports/email/quoprimime.py new file mode 100644 index 00000000..b69d158b --- /dev/null +++ b/src/future/backports/email/quoprimime.py @@ -0,0 +1,326 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Ben Gertzfield +# Contact: email-sig@python.org + +"""Quoted-printable content transfer encoding per RFCs 2045-2047. + +This module handles the content transfer encoding method defined in RFC 2045 +to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to +safely encode text that is in a character set similar to the 7-bit US ASCII +character set, but that includes some 8-bit characters that are normally not +allowed in email bodies or headers. + +Quoted-printable is very space-inefficient for encoding binary files; use the +email.base64mime module for that instead. + +This module provides an interface to encode and decode both headers and bodies +with quoted-printable encoding. + +RFC 2045 defines a method for including character set information in an +`encoded-word' in a header. This method is commonly used for 8-bit real names +in To:/From:/Cc: etc. fields, as well as Subject: lines. + +This module does not do the line wrapping or end-of-line character +conversion necessary for proper internationalized headers; it only +does dumb encoding and decoding. To deal with the various line +wrapping issues, use the email.header module. +""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future.builtins import bytes, chr, dict, int, range, super + +__all__ = [ + 'body_decode', + 'body_encode', + 'body_length', + 'decode', + 'decodestring', + 'header_decode', + 'header_encode', + 'header_length', + 'quote', + 'unquote', + ] + +import re +import io + +from string import ascii_letters, digits, hexdigits + +CRLF = '\r\n' +NL = '\n' +EMPTYSTRING = '' + +# Build a mapping of octets to the expansion of that octet. Since we're only +# going to have 256 of these things, this isn't terribly inefficient +# space-wise. Remember that headers and bodies have different sets of safe +# characters. Initialize both maps with the full expansion, and then override +# the safe bytes with the more compact form. +_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256)) +_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy() + +# Safe header bytes which need no encoding. +for c in bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')): + _QUOPRI_HEADER_MAP[c] = chr(c) +# Headers have one other special encoding; spaces become underscores. +_QUOPRI_HEADER_MAP[ord(' ')] = '_' + +# Safe body bytes which need no encoding. +for c in bytes(b' !"#$%&\'()*+,-./0123456789:;<>' + b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`' + b'abcdefghijklmnopqrstuvwxyz{|}~\t'): + _QUOPRI_BODY_MAP[c] = chr(c) + + + +# Helpers +def header_check(octet): + """Return True if the octet should be escaped with header quopri.""" + return chr(octet) != _QUOPRI_HEADER_MAP[octet] + + +def body_check(octet): + """Return True if the octet should be escaped with body quopri.""" + return chr(octet) != _QUOPRI_BODY_MAP[octet] + + +def header_length(bytearray): + """Return a header quoted-printable encoding length. + + Note that this does not include any RFC 2047 chrome added by + `header_encode()`. + + :param bytearray: An array of bytes (a.k.a. octets). + :return: The length in bytes of the byte array when it is encoded with + quoted-printable for headers. + """ + return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray) + + +def body_length(bytearray): + """Return a body quoted-printable encoding length. + + :param bytearray: An array of bytes (a.k.a. octets). + :return: The length in bytes of the byte array when it is encoded with + quoted-printable for bodies. + """ + return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray) + + +def _max_append(L, s, maxlen, extra=''): + if not isinstance(s, str): + s = chr(s) + if not L: + L.append(s.lstrip()) + elif len(L[-1]) + len(s) <= maxlen: + L[-1] += extra + s + else: + L.append(s.lstrip()) + + +def unquote(s): + """Turn a string in the form =AB to the ASCII character with value 0xab""" + return chr(int(s[1:3], 16)) + + +def quote(c): + return '=%02X' % ord(c) + + + +def header_encode(header_bytes, charset='iso-8859-1'): + """Encode a single header line with quoted-printable (like) encoding. + + Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but + used specifically for email header fields to allow charsets with mostly 7 + bit characters (and some 8 bit) to remain more or less readable in non-RFC + 2045 aware mail clients. + + charset names the character set to use in the RFC 2046 header. It + defaults to iso-8859-1. + """ + # Return empty headers as an empty string. + if not header_bytes: + return '' + # Iterate over every byte, encoding if necessary. + encoded = [] + for octet in header_bytes: + encoded.append(_QUOPRI_HEADER_MAP[octet]) + # Now add the RFC chrome to each encoded chunk and glue the chunks + # together. + return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded)) + + +class _body_accumulator(io.StringIO): + + def __init__(self, maxlinelen, eol, *args, **kw): + super().__init__(*args, **kw) + self.eol = eol + self.maxlinelen = self.room = maxlinelen + + def write_str(self, s): + """Add string s to the accumulated body.""" + self.write(s) + self.room -= len(s) + + def newline(self): + """Write eol, then start new line.""" + self.write_str(self.eol) + self.room = self.maxlinelen + + def write_soft_break(self): + """Write a soft break, then start a new line.""" + self.write_str('=') + self.newline() + + def write_wrapped(self, s, extra_room=0): + """Add a soft line break if needed, then write s.""" + if self.room < len(s) + extra_room: + self.write_soft_break() + self.write_str(s) + + def write_char(self, c, is_last_char): + if not is_last_char: + # Another character follows on this line, so we must leave + # extra room, either for it or a soft break, and whitespace + # need not be quoted. + self.write_wrapped(c, extra_room=1) + elif c not in ' \t': + # For this and remaining cases, no more characters follow, + # so there is no need to reserve extra room (since a hard + # break will immediately follow). + self.write_wrapped(c) + elif self.room >= 3: + # It's a whitespace character at end-of-line, and we have room + # for the three-character quoted encoding. + self.write(quote(c)) + elif self.room == 2: + # There's room for the whitespace character and a soft break. + self.write(c) + self.write_soft_break() + else: + # There's room only for a soft break. The quoted whitespace + # will be the only content on the subsequent line. + self.write_soft_break() + self.write(quote(c)) + + +def body_encode(body, maxlinelen=76, eol=NL): + """Encode with quoted-printable, wrapping at maxlinelen characters. + + Each line of encoded text will end with eol, which defaults to "\\n". Set + this to "\\r\\n" if you will be using the result of this function directly + in an email. + + Each line will be wrapped at, at most, maxlinelen characters before the + eol string (maxlinelen defaults to 76 characters, the maximum value + permitted by RFC 2045). Long lines will have the 'soft line break' + quoted-printable character "=" appended to them, so the decoded text will + be identical to the original text. + + The minimum maxlinelen is 4 to have room for a quoted character ("=XX") + followed by a soft line break. Smaller values will generate a + ValueError. + + """ + + if maxlinelen < 4: + raise ValueError("maxlinelen must be at least 4") + if not body: + return body + + # The last line may or may not end in eol, but all other lines do. + last_has_eol = (body[-1] in '\r\n') + + # This accumulator will make it easier to build the encoded body. + encoded_body = _body_accumulator(maxlinelen, eol) + + lines = body.splitlines() + last_line_no = len(lines) - 1 + for line_no, line in enumerate(lines): + last_char_index = len(line) - 1 + for i, c in enumerate(line): + if body_check(ord(c)): + c = quote(c) + encoded_body.write_char(c, i==last_char_index) + # Add an eol if input line had eol. All input lines have eol except + # possibly the last one. + if line_no < last_line_no or last_has_eol: + encoded_body.newline() + + return encoded_body.getvalue() + + + +# BAW: I'm not sure if the intent was for the signature of this function to be +# the same as base64MIME.decode() or not... +def decode(encoded, eol=NL): + """Decode a quoted-printable string. + + Lines are separated with eol, which defaults to \\n. + """ + if not encoded: + return encoded + # BAW: see comment in encode() above. Again, we're building up the + # decoded string with string concatenation, which could be done much more + # efficiently. + decoded = '' + + for line in encoded.splitlines(): + line = line.rstrip() + if not line: + decoded += eol + continue + + i = 0 + n = len(line) + while i < n: + c = line[i] + if c != '=': + decoded += c + i += 1 + # Otherwise, c == "=". Are we at the end of the line? If so, add + # a soft line break. + elif i+1 == n: + i += 1 + continue + # Decode if in form =AB + elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits: + decoded += unquote(line[i:i+3]) + i += 3 + # Otherwise, not in form =AB, pass literally + else: + decoded += c + i += 1 + + if i == n: + decoded += eol + # Special case if original string did not end with eol + if encoded[-1] not in '\r\n' and decoded.endswith(eol): + decoded = decoded[:-1] + return decoded + + +# For convenience and backwards compatibility w/ standard base64 module +body_decode = decode +decodestring = decode + + + +def _unquote_match(match): + """Turn a match in the form =AB to the ASCII character with value 0xab""" + s = match.group(0) + return unquote(s) + + +# Header decoding is done a bit differently +def header_decode(s): + """Decode a string encoded with RFC 2045 MIME header `Q' encoding. + + This function does not parse a full MIME header value encoded with + quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use + the high level email.header class for that functionality. + """ + s = s.replace('_', ' ') + return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII) diff --git a/src/future/backports/email/utils.py b/src/future/backports/email/utils.py new file mode 100644 index 00000000..4abebf7c --- /dev/null +++ b/src/future/backports/email/utils.py @@ -0,0 +1,400 @@ +# Copyright (C) 2001-2010 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Miscellaneous utilities.""" + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future import utils +from future.builtins import bytes, int, str + +__all__ = [ + 'collapse_rfc2231_value', + 'decode_params', + 'decode_rfc2231', + 'encode_rfc2231', + 'formataddr', + 'formatdate', + 'format_datetime', + 'getaddresses', + 'make_msgid', + 'mktime_tz', + 'parseaddr', + 'parsedate', + 'parsedate_tz', + 'parsedate_to_datetime', + 'unquote', + ] + +import os +import re +if utils.PY2: + re.ASCII = 0 +import time +import base64 +import random +import socket +from future.backports import datetime +from future.backports.urllib.parse import quote as url_quote, unquote as url_unquote +import warnings +from io import StringIO + +from future.backports.email._parseaddr import quote +from future.backports.email._parseaddr import AddressList as _AddressList +from future.backports.email._parseaddr import mktime_tz + +from future.backports.email._parseaddr import parsedate, parsedate_tz, _parsedate_tz + +from quopri import decodestring as _qdecode + +# Intrapackage imports +from future.backports.email.encoders import _bencode, _qencode +from future.backports.email.charset import Charset + +COMMASPACE = ', ' +EMPTYSTRING = '' +UEMPTYSTRING = '' +CRLF = '\r\n' +TICK = "'" + +specialsre = re.compile(r'[][\\()<>@,:;".]') +escapesre = re.compile(r'[\\"]') + +# How to figure out if we are processing strings that come from a byte +# source with undecodable characters. +_has_surrogates = re.compile( + '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search + +# How to deal with a string containing bytes before handing it to the +# application through the 'normal' interface. +def _sanitize(string): + # Turn any escaped bytes into unicode 'unknown' char. + original_bytes = string.encode('ascii', 'surrogateescape') + return original_bytes.decode('ascii', 'replace') + + +# Helpers + +def formataddr(pair, charset='utf-8'): + """The inverse of parseaddr(), this takes a 2-tuple of the form + (realname, email_address) and returns the string value suitable + for an RFC 2822 From, To or Cc header. + + If the first element of pair is false, then the second element is + returned unmodified. + + Optional charset if given is the character set that is used to encode + realname in case realname is not ASCII safe. Can be an instance of str or + a Charset-like object which has a header_encode method. Default is + 'utf-8'. + """ + name, address = pair + # The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't. + address.encode('ascii') + if name: + try: + name.encode('ascii') + except UnicodeEncodeError: + if isinstance(charset, str): + charset = Charset(charset) + encoded_name = charset.header_encode(name) + return "%s <%s>" % (encoded_name, address) + else: + quotes = '' + if specialsre.search(name): + quotes = '"' + name = escapesre.sub(r'\\\g<0>', name) + return '%s%s%s <%s>' % (quotes, name, quotes, address) + return address + + + +def getaddresses(fieldvalues): + """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" + all = COMMASPACE.join(fieldvalues) + a = _AddressList(all) + return a.addresslist + + + +ecre = re.compile(r''' + =\? # literal =? + (?P[^?]*?) # non-greedy up to the next ? is the charset + \? # literal ? + (?P[qb]) # either a "q" or a "b", case insensitive + \? # literal ? + (?P.*?) # non-greedy up to the next ?= is the atom + \?= # literal ?= + ''', re.VERBOSE | re.IGNORECASE) + + +def _format_timetuple_and_zone(timetuple, zone): + return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( + ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]], + timetuple[2], + ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1], + timetuple[0], timetuple[3], timetuple[4], timetuple[5], + zone) + +def formatdate(timeval=None, localtime=False, usegmt=False): + """Returns a date string as specified by RFC 2822, e.g.: + + Fri, 09 Nov 2001 01:08:47 -0000 + + Optional timeval if given is a floating point time value as accepted by + gmtime() and localtime(), otherwise the current time is used. + + Optional localtime is a flag that when True, interprets timeval, and + returns a date relative to the local timezone instead of UTC, properly + taking daylight savings time into account. + + Optional argument usegmt means that the timezone is written out as + an ascii string, not numeric one (so "GMT" instead of "+0000"). This + is needed for HTTP, and is only used when localtime==False. + """ + # Note: we cannot use strftime() because that honors the locale and RFC + # 2822 requires that day and month names be the English abbreviations. + if timeval is None: + timeval = time.time() + if localtime: + now = time.localtime(timeval) + # Calculate timezone offset, based on whether the local zone has + # daylight savings time, and whether DST is in effect. + if time.daylight and now[-1]: + offset = time.altzone + else: + offset = time.timezone + hours, minutes = divmod(abs(offset), 3600) + # Remember offset is in seconds west of UTC, but the timezone is in + # minutes east of UTC, so the signs differ. + if offset > 0: + sign = '-' + else: + sign = '+' + zone = '%s%02d%02d' % (sign, hours, minutes // 60) + else: + now = time.gmtime(timeval) + # Timezone offset is always -0000 + if usegmt: + zone = 'GMT' + else: + zone = '-0000' + return _format_timetuple_and_zone(now, zone) + +def format_datetime(dt, usegmt=False): + """Turn a datetime into a date string as specified in RFC 2822. + + If usegmt is True, dt must be an aware datetime with an offset of zero. In + this case 'GMT' will be rendered instead of the normal +0000 required by + RFC2822. This is to support HTTP headers involving date stamps. + """ + now = dt.timetuple() + if usegmt: + if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc: + raise ValueError("usegmt option requires a UTC datetime") + zone = 'GMT' + elif dt.tzinfo is None: + zone = '-0000' + else: + zone = dt.strftime("%z") + return _format_timetuple_and_zone(now, zone) + + +def make_msgid(idstring=None, domain=None): + """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: + + <20020201195627.33539.96671@nightshade.la.mastaler.com> + + Optional idstring if given is a string used to strengthen the + uniqueness of the message id. Optional domain if given provides the + portion of the message id after the '@'. It defaults to the locally + defined hostname. + """ + timeval = time.time() + utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval)) + pid = os.getpid() + randint = random.randrange(100000) + if idstring is None: + idstring = '' + else: + idstring = '.' + idstring + if domain is None: + domain = socket.getfqdn() + msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain) + return msgid + + +def parsedate_to_datetime(data): + _3to2list = list(_parsedate_tz(data)) + dtuple, tz, = [_3to2list[:-1]] + _3to2list[-1:] + if tz is None: + return datetime.datetime(*dtuple[:6]) + return datetime.datetime(*dtuple[:6], + tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) + + +def parseaddr(addr): + addrs = _AddressList(addr).addresslist + if not addrs: + return '', '' + return addrs[0] + + +# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. +def unquote(str): + """Remove quotes from a string.""" + if len(str) > 1: + if str.startswith('"') and str.endswith('"'): + return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') + if str.startswith('<') and str.endswith('>'): + return str[1:-1] + return str + + + +# RFC2231-related functions - parameter encoding and decoding +def decode_rfc2231(s): + """Decode string according to RFC 2231""" + parts = s.split(TICK, 2) + if len(parts) <= 2: + return None, None, s + return parts + + +def encode_rfc2231(s, charset=None, language=None): + """Encode string according to RFC 2231. + + If neither charset nor language is given, then s is returned as-is. If + charset is given but not language, the string is encoded using the empty + string for language. + """ + s = url_quote(s, safe='', encoding=charset or 'ascii') + if charset is None and language is None: + return s + if language is None: + language = '' + return "%s'%s'%s" % (charset, language, s) + + +rfc2231_continuation = re.compile(r'^(?P\w+)\*((?P[0-9]+)\*?)?$', + re.ASCII) + +def decode_params(params): + """Decode parameters list according to RFC 2231. + + params is a sequence of 2-tuples containing (param name, string value). + """ + # Copy params so we don't mess with the original + params = params[:] + new_params = [] + # Map parameter's name to a list of continuations. The values are a + # 3-tuple of the continuation number, the string value, and a flag + # specifying whether a particular segment is %-encoded. + rfc2231_params = {} + name, value = params.pop(0) + new_params.append((name, value)) + while params: + name, value = params.pop(0) + if name.endswith('*'): + encoded = True + else: + encoded = False + value = unquote(value) + mo = rfc2231_continuation.match(name) + if mo: + name, num = mo.group('name', 'num') + if num is not None: + num = int(num) + rfc2231_params.setdefault(name, []).append((num, value, encoded)) + else: + new_params.append((name, '"%s"' % quote(value))) + if rfc2231_params: + for name, continuations in rfc2231_params.items(): + value = [] + extended = False + # Sort by number + continuations.sort() + # And now append all values in numerical order, converting + # %-encodings for the encoded segments. If any of the + # continuation names ends in a *, then the entire string, after + # decoding segments and concatenating, must have the charset and + # language specifiers at the beginning of the string. + for num, s, encoded in continuations: + if encoded: + # Decode as "latin-1", so the characters in s directly + # represent the percent-encoded octet values. + # collapse_rfc2231_value treats this as an octet sequence. + s = url_unquote(s, encoding="latin-1") + extended = True + value.append(s) + value = quote(EMPTYSTRING.join(value)) + if extended: + charset, language, value = decode_rfc2231(value) + new_params.append((name, (charset, language, '"%s"' % value))) + else: + new_params.append((name, '"%s"' % value)) + return new_params + +def collapse_rfc2231_value(value, errors='replace', + fallback_charset='us-ascii'): + if not isinstance(value, tuple) or len(value) != 3: + return unquote(value) + # While value comes to us as a unicode string, we need it to be a bytes + # object. We do not want bytes() normal utf-8 decoder, we want a straight + # interpretation of the string as character bytes. + charset, language, text = value + rawbytes = bytes(text, 'raw-unicode-escape') + try: + return str(rawbytes, charset, errors) + except LookupError: + # charset is not a known codec. + return unquote(text) + + +# +# datetime doesn't provide a localtime function yet, so provide one. Code +# adapted from the patch in issue 9527. This may not be perfect, but it is +# better than not having it. +# + +def localtime(dt=None, isdst=-1): + """Return local time as an aware datetime object. + + If called without arguments, return current time. Otherwise *dt* + argument should be a datetime instance, and it is converted to the + local time zone according to the system time zone database. If *dt* is + naive (that is, dt.tzinfo is None), it is assumed to be in local time. + In this case, a positive or zero value for *isdst* causes localtime to + presume initially that summer time (for example, Daylight Saving Time) + is or is not (respectively) in effect for the specified time. A + negative value for *isdst* causes the localtime() function to attempt + to divine whether summer time is in effect for the specified time. + + """ + if dt is None: + return datetime.datetime.now(datetime.timezone.utc).astimezone() + if dt.tzinfo is not None: + return dt.astimezone() + # We have a naive datetime. Convert to a (localtime) timetuple and pass to + # system mktime together with the isdst hint. System mktime will return + # seconds since epoch. + tm = dt.timetuple()[:-1] + (isdst,) + seconds = time.mktime(tm) + localtm = time.localtime(seconds) + try: + delta = datetime.timedelta(seconds=localtm.tm_gmtoff) + tz = datetime.timezone(delta, localtm.tm_zone) + except AttributeError: + # Compute UTC offset and compare with the value implied by tm_isdst. + # If the values match, use the zone name implied by tm_isdst. + delta = dt - datetime.datetime(*time.gmtime(seconds)[:6]) + dst = time.daylight and localtm.tm_isdst > 0 + gmtoff = -(time.altzone if dst else time.timezone) + if delta == datetime.timedelta(seconds=gmtoff): + tz = datetime.timezone(delta, time.tzname[dst]) + else: + tz = datetime.timezone(delta) + return dt.replace(tzinfo=tz) diff --git a/future/standard_library/html/__init__.py b/src/future/backports/html/__init__.py similarity index 99% rename from future/standard_library/html/__init__.py rename to src/future/backports/html/__init__.py index 837afce1..58e133fd 100644 --- a/future/standard_library/html/__init__.py +++ b/src/future/backports/html/__init__.py @@ -25,4 +25,3 @@ def escape(s, quote=True): if quote: return s.translate(_escape_map_full) return s.translate(_escape_map) - diff --git a/future/standard_library/html/entities.py b/src/future/backports/html/entities.py similarity index 99% rename from future/standard_library/html/entities.py rename to src/future/backports/html/entities.py index 84b63ddf..5c73f692 100644 --- a/future/standard_library/html/entities.py +++ b/src/future/backports/html/entities.py @@ -5,7 +5,6 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from future import standard_library from future.builtins import * @@ -2513,4 +2512,3 @@ entitydefs[name] = chr(codepoint) del name, codepoint - diff --git a/future/standard_library/html/parser.py b/src/future/backports/html/parser.py similarity index 99% rename from future/standard_library/html/parser.py rename to src/future/backports/html/parser.py index 5b398378..fb652636 100644 --- a/future/standard_library/html/parser.py +++ b/src/future/backports/html/parser.py @@ -12,10 +12,8 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from future import standard_library from future.builtins import * - -import _markupbase +from future.backports import _markupbase import re import warnings @@ -523,7 +521,7 @@ def replaceEntities(s): except ValueError: return '&#' + s else: - from html.entities import html5 + from future.backports.html.entities import html5 if s in html5: return html5[s] elif s.endswith(';'): @@ -536,4 +534,3 @@ def replaceEntities(s): return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))", replaceEntities, s) - diff --git a/future/tests/__init__.py b/src/future/backports/http/__init__.py similarity index 100% rename from future/tests/__init__.py rename to src/future/backports/http/__init__.py diff --git a/future/standard_library/http/client.py b/src/future/backports/http/client.py similarity index 88% rename from future/standard_library/http/client.py rename to src/future/backports/http/client.py index 8d7ef0f4..e663d125 100644 --- a/future/standard_library/http/client.py +++ b/src/future/backports/http/client.py @@ -1,8 +1,9 @@ """HTTP/1.1 client library -A backport of the Python 3.3 module to Python 2.7 using ``future``. +A backport of the Python 3.3 http/client.py module for python-future. --------------- + + HTTPConnection goes through a number of "states", which define when a client may legally make another request or fetch the response for a particular @@ -69,24 +70,24 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from future.builtins import * -from future.utils import isbytes, istext +from future.builtins import bytes, int, str, super +from future.utils import PY2 -import email.parser -import email.message +from future.backports.email import parser as email_parser +from future.backports.email import message as email_message +from future.backports.misc import create_connection as socket_create_connection import io import os import socket -import collections -# Not yet backported: -# from urllib.parse import urlsplit -# Use the Py2.7 equivalent: -from urlparse import urlsplit - +from future.backports.urllib.parse import urlsplit import warnings -import numbers from array import array +if PY2: + from collections import Iterable +else: + from collections.abc import Iterable + __all__ = ["HTTPResponse", "HTTPConnection", "HTTPException", "NotConnected", "UnknownProtocol", "UnknownTransferEncoding", "UnimplementedFileMode", @@ -226,8 +227,10 @@ # maximal line length when calling readline(). _MAXLINE = 65536 +_MAXHEADERS = 100 -class HTTPMessage(email.message.Message, object): + +class HTTPMessage(email_message.Message): # XXX The only usage of this method is in # http.server.CGIHTTPRequestHandler. Maybe move the code there so # that it doesn't need to be part of the public API. The API has @@ -273,16 +276,17 @@ def parse_headers(fp, _class=HTTPMessage): if len(line) > _MAXLINE: raise LineTooLong("header line") headers.append(line) + if len(headers) > _MAXHEADERS: + raise HTTPException("got more than %d headers" % _MAXHEADERS) if line in (b'\r\n', b'\n', b''): break hstring = bytes(b'').join(headers).decode('iso-8859-1') - # Try passing it as bytes to Py2.7 email.parser.parsestr - # which expects a byte-string - return email.parser.Parser(_class=_class).parsestr(hstring) + return email_parser.Parser(_class=_class).parsestr(hstring) + _strict_sentinel = object() -class HTTPResponse(io.RawIOBase, object): +class HTTPResponse(io.RawIOBase): # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. @@ -513,7 +517,7 @@ def read(self, amt=None): if amt is not None: # Amount is given, so call base class version # (which is implemented in terms of self.readinto) - return super(HTTPResponse, self).read(amt) + return bytes(super(HTTPResponse, self).read(amt)) else: # Amount is not given (unbounded read) so we must check self.length # and self.chunked @@ -531,7 +535,7 @@ def read(self, amt=None): raise self.length = 0 self._close_conn() # we read everything - return s + return bytes(s) def readinto(self, b): if self.fp is None: @@ -552,8 +556,15 @@ def readinto(self, b): # we do not use _safe_read() here because this may be a .will_close # connection, and the user is reading more bytes than will be provided # (for example, reading in 1k chunks) - n = self.fp.readinto(b) - if not n: + + if PY2: + data = self.fp.read(len(b)) + n = len(data) + b[:n] = data + else: + n = self.fp.readinto(b) + + if not n and b: # Ideally, we would raise IncompleteRead if the content-length # wasn't satisfied, but it might break compatibility. self._close_conn() @@ -604,7 +615,7 @@ def _readall_chunked(self): if chunk_left == 0: break except ValueError: - raise IncompleteRead(b''.join(value)) + raise IncompleteRead(bytes(b'').join(value)) value.append(self._safe_read(chunk_left)) # we read the whole chunk, get another @@ -677,7 +688,7 @@ def _safe_read(self, amt): while amt > 0: chunk = self.fp.read(min(amt, MAXAMOUNT)) if not chunk: - raise IncompleteRead(b''.join(s), amt) + raise IncompleteRead(bytes(b'').join(s), amt) s.append(chunk) amt -= len(chunk) return bytes(b"").join(s) @@ -689,9 +700,19 @@ def _safe_readinto(self, b): while total_bytes < len(b): if MAXAMOUNT < len(mvb): temp_mvb = mvb[0:MAXAMOUNT] - n = self.fp.readinto(temp_mvb) + if PY2: + data = self.fp.read(len(temp_mvb)) + n = len(data) + temp_mvb[:n] = data + else: + n = self.fp.readinto(temp_mvb) else: - n = self.fp.readinto(mvb) + if PY2: + data = self.fp.read(len(mvb)) + n = len(data) + mvb[:n] = data + else: + n = self.fp.readinto(mvb) if not n: raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b)) mvb = mvb[n:] @@ -705,11 +726,11 @@ def getheader(self, name, default=None): if self.headers is None: raise ResponseNotReady() headers = self.headers.get_all(name) or default - if istext(headers) or not hasattr(headers, '__iter__'): + if isinstance(headers, str) or not hasattr(headers, '__iter__'): return headers else: return ', '.join(headers) - + def getheaders(self): """Return list of (header, value) tuples.""" if self.headers is None: @@ -827,7 +848,7 @@ def _tunnel(self): def connect(self): """Connect to the host and port specified in __init__.""" - self.sock = socket.create_connection((self.host,self.port), + self.sock = socket_create_connection((self.host,self.port), self.timeout, self.source_address) if self._tunnel_host: self._tunnel() @@ -881,11 +902,11 @@ def send(self, data): if encode: datablock = datablock.encode("iso-8859-1") self.sock.sendall(datablock) - + return try: self.sock.sendall(data) except TypeError: - if isinstance(data, collections.Iterable): + if isinstance(data, Iterable): for d in data: self.sock.sendall(d) else: @@ -911,7 +932,7 @@ def _send_output(self, message_body=None): # If msg and message_body are sent in a single send() call, # it will avoid performance problems caused by the interaction # between delayed ack and the Nagle algorithm. - if isbytes(message_body): + if isinstance(message_body, bytes): msg += message_body message_body = None self.send(msg) @@ -1005,7 +1026,7 @@ def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): # when used as Host header if self.host.find(':') >= 0: - host_enc = b'[' + host_enc + b']' + host_enc = bytes(b'[' + host_enc + b']') if self.port == self.default_port: self.putheader('Host', host_enc) @@ -1050,10 +1071,10 @@ def putheader(self, header, *values): for i, one_value in enumerate(values): if hasattr(one_value, 'encode'): values[i] = one_value.encode('latin-1') - elif isinstance(one_value, numbers.Integral): + elif isinstance(one_value, int): values[i] = str(one_value).encode('ascii') value = bytes(b'\r\n\t').join(values) - header = header + b': ' + value + header = header + bytes(b': ') + value self._output(header) def endheaders(self, message_body=None): @@ -1107,7 +1128,7 @@ def _send_request(self, method, url, body, headers): self._set_content_length(body) for hdr, value in headers.items(): self.putheader(hdr, value) - if istext(body): + if isinstance(body, str): # RFC 2616 Section 3.7.1 says that text default has a # default charset of iso-8859-1. body = body.encode('iso-8859-1') @@ -1170,38 +1191,97 @@ class the response_class variable. try: import ssl + from ssl import SSLContext except ImportError: pass else: - ###################################### - # We use the old HTTPSConnection class from Py2.7, because ssl.SSLContext - # doesn't exist in the Py2.7 stdlib class HTTPSConnection(HTTPConnection): "This class allows communication via SSL." default_port = HTTPS_PORT + # XXX Should key_file and cert_file be deprecated in favour of context? + def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None): - HTTPConnection.__init__(self, host, port, strict, timeout, - source_address) + strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, **_3to2kwargs): + if 'check_hostname' in _3to2kwargs: check_hostname = _3to2kwargs['check_hostname']; del _3to2kwargs['check_hostname'] + else: check_hostname = None + if 'context' in _3to2kwargs: context = _3to2kwargs['context']; del _3to2kwargs['context'] + else: context = None + super(HTTPSConnection, self).__init__(host, port, strict, timeout, + source_address) self.key_file = key_file self.cert_file = cert_file + if context is None: + # Some reasonable defaults + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.options |= ssl.OP_NO_SSLv2 + will_verify = context.verify_mode != ssl.CERT_NONE + if check_hostname is None: + check_hostname = will_verify + elif check_hostname and not will_verify: + raise ValueError("check_hostname needs a SSL context with " + "either CERT_OPTIONAL or CERT_REQUIRED") + if key_file or cert_file: + context.load_cert_chain(cert_file, key_file) + self._context = context + self._check_hostname = check_hostname def connect(self): "Connect to a host on a given (SSL) port." - sock = socket.create_connection((self.host, self.port), + sock = socket_create_connection((self.host, self.port), self.timeout, self.source_address) + if self._tunnel_host: self.sock = sock self._tunnel() - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file) + + server_hostname = self.host if ssl.HAS_SNI else None + self.sock = self._context.wrap_socket(sock, + server_hostname=server_hostname) + try: + if self._check_hostname: + ssl.match_hostname(self.sock.getpeercert(), self.host) + except Exception: + self.sock.shutdown(socket.SHUT_RDWR) + self.sock.close() + raise __all__.append("HTTPSConnection") + # ###################################### + # # We use the old HTTPSConnection class from Py2.7, because ssl.SSLContext + # # doesn't exist in the Py2.7 stdlib + # class HTTPSConnection(HTTPConnection): + # "This class allows communication via SSL." + + # default_port = HTTPS_PORT + + # def __init__(self, host, port=None, key_file=None, cert_file=None, + # strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + # source_address=None): + # HTTPConnection.__init__(self, host, port, strict, timeout, + # source_address) + # self.key_file = key_file + # self.cert_file = cert_file + + # def connect(self): + # "Connect to a host on a given (SSL) port." + + # sock = socket_create_connection((self.host, self.port), + # self.timeout, self.source_address) + # if self._tunnel_host: + # self.sock = sock + # self._tunnel() + # self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file) + + # __all__.append("HTTPSConnection") + # ###################################### + + class HTTPException(Exception): # Subclasses that define an __init__ must call Exception.__init__ # or define self.args. Otherwise, str() will fail. diff --git a/future/standard_library/http/cookiejar.py b/src/future/backports/http/cookiejar.py similarity index 82% rename from future/standard_library/http/cookiejar.py rename to src/future/backports/http/cookiejar.py index 3981ff1f..a39242c0 100644 --- a/future/standard_library/http/cookiejar.py +++ b/src/future/backports/http/cookiejar.py @@ -1,7 +1,7 @@ r"""HTTP cookie handling for web clients. -This is based on the Py3.3 ``http.cookiejar`` module and the Py2.7 -``cookielib`` module. +This is a backport of the Py3.3 ``http.cookiejar`` module for +python-future. This module has (now fairly distant) origins in Gisle Aas' Perl module HTTP::Cookies, from the libwww-perl library. @@ -27,9 +27,13 @@ MSIECookieJar """ -from __future__ import (absolute_import, division) # , unicode_literals) -from future import standard_library -# from future.builtins import * + +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from future.builtins import filter, int, map, open, str +from future.utils import as_native_str, PY2 __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] @@ -37,15 +41,15 @@ import copy import datetime import re +if PY2: + re.ASCII = 0 import time -import urlparse, urllib +from future.backports.urllib.parse import urlparse, urlsplit, quote +from future.backports.http.client import HTTP_PORT try: import threading as _threading except ImportError: import dummy_threading as _threading -# Instead of this: import httplib -# Import this new-style one: -import http.client # only for the default HTTP port from calendar import timegm debug = False # set to True to enable debugging via the logging module @@ -61,7 +65,7 @@ def _debug(*args): return logger.debug(*args) -DEFAULT_HTTP_PORT = str(http.client.HTTP_PORT) +DEFAULT_HTTP_PORT = str(HTTP_PORT) MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " "instance initialised with one)") @@ -135,7 +139,7 @@ def time2netscape(t=None): UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} -TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$") +TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII) def offset_from_tz_string(tz): offset = None if tz in UTC_ZONES: @@ -205,9 +209,9 @@ def _str2time(day, mon, yr, hr, min, sec, tz): STRICT_DATE_RE = re.compile( r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " - "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$") + "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII) WEEKDAY_RE = re.compile( - r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I) + r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII) LOOSE_HTTP_DATE_RE = re.compile( r"""^ (\d\d?) # day @@ -221,10 +225,14 @@ def _str2time(day, mon, yr, hr, min, sec, tz): (?::(\d\d))? # optional seconds )? # optional clock \s* - ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone + (?: + ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+) # timezone + \s* + )? + (?: + \(\w+\) # ASCII representation of timezone in parens. \s* - (?:\(\w+\))? # ASCII representation of timezone in parens. - \s*$""", re.X) + )?$""", re.X | re.ASCII) def http2time(text): """Returns time in seconds since epoch of time represented by a string. @@ -294,9 +302,11 @@ def http2time(text): (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) )? # optional clock \s* - ([-+]?\d\d?:?(:?\d\d)? - |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) - \s*$""", re.X) + (?: + ([-+]?\d\d?:?(:?\d\d)? + |Z|z) # timezone (Z is "zero meridian", i.e. GMT) + \s* + )?$""", re.X | re. ASCII) def iso2time(text): """ As for http2time, but parses the ISO 8601 formats: @@ -384,7 +394,7 @@ def split_header_words(header_values): [[('Basic', None), ('realm', '"foobar"')]] """ - assert not isinstance(header_values, basestring) + assert not isinstance(header_values, str) result = [] for text in header_values: orig_text = text @@ -509,7 +519,7 @@ def parse_ns_headers(ns_headers): return result -IPV4_RE = re.compile(r"\.\d+$") +IPV4_RE = re.compile(r"\.\d+$", re.ASCII) def is_HDN(text): """Return True if text is a host domain name.""" # XXX @@ -594,7 +604,7 @@ def user_domain_match(A, B): return True return False -cut_port_re = re.compile(r":\d+$") +cut_port_re = re.compile(r":\d+$", re.ASCII) def request_host(request): """Return request-host, as defined by RFC 2965. @@ -603,7 +613,7 @@ def request_host(request): """ url = request.get_full_url() - host = urllib.parse.urlparse(url)[1] + host = urlparse(url)[1] if host == "": host = request.get_header("Host", "") @@ -625,7 +635,7 @@ def eff_request_host(request): def request_path(request): """Path component of request-URI, as defined by RFC 2965.""" url = request.get_full_url() - parts = urllib.parse.urlsplit(url) + parts = urlsplit(url) path = escape_path(parts.path) if not path.startswith("/"): # fix bad RFC 2396 absoluteURI @@ -633,7 +643,7 @@ def request_path(request): return path def request_port(request): - host = request.get_host() + host = request.host i = host.find(':') if i >= 0: port = host[i+1:] @@ -662,9 +672,7 @@ def escape_path(path): # And here, kind of: draft-fielding-uri-rfc2396bis-03 # (And in draft IRI specification: draft-duerst-iri-05) # (And here, for new URI schemes: RFC 2718) - if isinstance(path, unicode): - path = path.encode("utf-8") - path = urllib.quote(path, HTTP_PATH_SAFE) + path = quote(path, HTTP_PATH_SAFE) path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) return path @@ -803,6 +811,7 @@ def __str__(self): namevalue = self.name return "" % (namevalue, limit) + @as_native_str() def __repr__(self): args = [] for name in ("version", "name", "value", @@ -812,7 +821,12 @@ def __repr__(self): "secure", "expires", "discard", "comment", "comment_url", ): attr = getattr(self, name) - args.append("%s=%s" % (name, repr(attr))) + ### Python-Future: + # Avoid u'...' prefixes for unicode strings: + if isinstance(attr, str): + attr = str(attr) + ### + args.append(str("%s=%s") % (name, repr(attr))) args.append("rest=%s" % repr(self._rest)) args.append("rfc2109=%s" % repr(self.rfc2109)) return "Cookie(%s)" % ", ".join(args) @@ -959,7 +973,7 @@ def set_ok_version(self, cookie, request): return True def set_ok_verifiability(self, cookie, request): - if request.is_unverifiable() and is_third_party(request): + if request.unverifiable and is_third_party(request): if cookie.version > 0 and self.strict_rfc2965_unverifiable: _debug(" third-party RFC 2965 cookie during " "unverifiable transaction") @@ -1098,7 +1112,7 @@ def return_ok_version(self, cookie, request): return True def return_ok_verifiability(self, cookie, request): - if request.is_unverifiable() and is_third_party(request): + if request.unverifiable and is_third_party(request): if cookie.version > 0 and self.strict_rfc2965_unverifiable: _debug(" third-party RFC 2965 cookie during unverifiable " "transaction") @@ -1110,7 +1124,7 @@ def return_ok_verifiability(self, cookie, request): return True def return_ok_secure(self, cookie, request): - if cookie.secure and request.get_type() != "https": + if cookie.secure and request.type != "https": _debug(" secure cookie with non-secure request") return False return True @@ -1217,8 +1231,7 @@ class CookieJar(object): """Collection of HTTP cookies. You may not need to know about this class: try - urllib2.build_opener(HTTPCookieProcessor).open(url). - + urllib.request.build_opener(HTTPCookieProcessor).open(url). """ non_word_re = re.compile(r"\W") @@ -1227,7 +1240,7 @@ class CookieJar(object): domain_re = re.compile(r"[^.]*") dots_re = re.compile(r"^\.+") - magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)") + magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII) def __init__(self, policy=None): if policy is None: @@ -1325,7 +1338,7 @@ def _cookie_attrs(self, cookies): return attrs def add_cookie_header(self, request): - """Add correct Cookie: header to request (urllib2.Request object). + """Add correct Cookie: header to request (urllib.request.Request object). The Cookie2 header is also added unless policy.hide_cookie2 is true. @@ -1571,8 +1584,8 @@ def make_cookies(self, response, request): """Return sequence of Cookie objects extracted from response object.""" # get cookie-attributes for RFC 2965 and Netscape protocols headers = response.info() - rfc2965_hdrs = headers.getheaders("Set-Cookie2") # FIXME? - ns_hdrs = headers.getheaders("Set-Cookie") # FIXME? + rfc2965_hdrs = headers.get_all("Set-Cookie2", []) + ns_hdrs = headers.get_all("Set-Cookie", []) rfc2965 = self._policy.rfc2965 netscape = self._policy.netscape @@ -1731,6 +1744,7 @@ def __len__(self): for cookie in self: i = i + 1 return i + @as_native_str() def __repr__(self): r = [] for cookie in self: r.append(repr(cookie)) @@ -1805,5 +1819,298 @@ def revert(self, filename=None, finally: self._cookies_lock.release() -from _LWPCookieJar import LWPCookieJar, lwp_cookie_str -from _MozillaCookieJar import MozillaCookieJar + +def lwp_cookie_str(cookie): + """Return string representation of Cookie in an the LWP cookie file format. + + Actually, the format is extended a bit -- see module docstring. + + """ + h = [(cookie.name, cookie.value), + ("path", cookie.path), + ("domain", cookie.domain)] + if cookie.port is not None: h.append(("port", cookie.port)) + if cookie.path_specified: h.append(("path_spec", None)) + if cookie.port_specified: h.append(("port_spec", None)) + if cookie.domain_initial_dot: h.append(("domain_dot", None)) + if cookie.secure: h.append(("secure", None)) + if cookie.expires: h.append(("expires", + time2isoz(float(cookie.expires)))) + if cookie.discard: h.append(("discard", None)) + if cookie.comment: h.append(("comment", cookie.comment)) + if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) + + keys = sorted(cookie._rest.keys()) + for k in keys: + h.append((k, str(cookie._rest[k]))) + + h.append(("version", str(cookie.version))) + + return join_header_words([h]) + +class LWPCookieJar(FileCookieJar): + """ + The LWPCookieJar saves a sequence of "Set-Cookie3" lines. + "Set-Cookie3" is the format used by the libwww-perl library, not known + to be compatible with any browser, but which is easy to read and + doesn't lose information about RFC 2965 cookies. + + Additional methods + + as_lwp_str(ignore_discard=True, ignore_expired=True) + + """ + + def as_lwp_str(self, ignore_discard=True, ignore_expires=True): + """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers. + + ignore_discard and ignore_expires: see docstring for FileCookieJar.save + + """ + now = time.time() + r = [] + for cookie in self: + if not ignore_discard and cookie.discard: + continue + if not ignore_expires and cookie.is_expired(now): + continue + r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie)) + return "\n".join(r+[""]) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + f = open(filename, "w") + try: + # There really isn't an LWP Cookies 2.0 format, but this indicates + # that there is extra information in here (domain_dot and + # port_spec) while still being compatible with libwww-perl, I hope. + f.write("#LWP-Cookies-2.0\n") + f.write(self.as_lwp_str(ignore_discard, ignore_expires)) + finally: + f.close() + + def _really_load(self, f, filename, ignore_discard, ignore_expires): + magic = f.readline() + if not self.magic_re.search(magic): + msg = ("%r does not look like a Set-Cookie3 (LWP) format " + "file" % filename) + raise LoadError(msg) + + now = time.time() + + header = "Set-Cookie3:" + boolean_attrs = ("port_spec", "path_spec", "domain_dot", + "secure", "discard") + value_attrs = ("version", + "port", "path", "domain", + "expires", + "comment", "commenturl") + + try: + while 1: + line = f.readline() + if line == "": break + if not line.startswith(header): + continue + line = line[len(header):].strip() + + for data in split_header_words([line]): + name, value = data[0] + standard = {} + rest = {} + for k in boolean_attrs: + standard[k] = False + for k, v in data[1:]: + if k is not None: + lc = k.lower() + else: + lc = None + # don't lose case distinction for unknown fields + if (lc in value_attrs) or (lc in boolean_attrs): + k = lc + if k in boolean_attrs: + if v is None: v = True + standard[k] = v + elif k in value_attrs: + standard[k] = v + else: + rest[k] = v + + h = standard.get + expires = h("expires") + discard = h("discard") + if expires is not None: + expires = iso2time(expires) + if expires is None: + discard = True + domain = h("domain") + domain_specified = domain.startswith(".") + c = Cookie(h("version"), name, value, + h("port"), h("port_spec"), + domain, domain_specified, h("domain_dot"), + h("path"), h("path_spec"), + h("secure"), + expires, + discard, + h("comment"), + h("commenturl"), + rest) + if not ignore_discard and c.discard: + continue + if not ignore_expires and c.is_expired(now): + continue + self.set_cookie(c) + + except IOError: + raise + except Exception: + _warn_unhandled_exception() + raise LoadError("invalid Set-Cookie3 format file %r: %r" % + (filename, line)) + + +class MozillaCookieJar(FileCookieJar): + """ + + WARNING: you may want to backup your browser's cookies file if you use + this class to save cookies. I *think* it works, but there have been + bugs in the past! + + This class differs from CookieJar only in the format it uses to save and + load cookies to and from a file. This class uses the Mozilla/Netscape + `cookies.txt' format. lynx uses this file format, too. + + Don't expect cookies saved while the browser is running to be noticed by + the browser (in fact, Mozilla on unix will overwrite your saved cookies if + you change them on disk while it's running; on Windows, you probably can't + save at all while the browser is running). + + Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to + Netscape cookies on saving. + + In particular, the cookie version and port number information is lost, + together with information about whether or not Path, Port and Discard were + specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the + domain as set in the HTTP header started with a dot (yes, I'm aware some + domains in Netscape files start with a dot and some don't -- trust me, you + really don't want to know any more about this). + + Note that though Mozilla and Netscape use the same format, they use + slightly different headers. The class saves cookies using the Netscape + header by default (Mozilla can cope with that). + + """ + magic_re = re.compile("#( Netscape)? HTTP Cookie File") + header = """\ +# Netscape HTTP Cookie File +# http://www.netscape.com/newsref/std/cookie_spec.html +# This is a generated file! Do not edit. + +""" + + def _really_load(self, f, filename, ignore_discard, ignore_expires): + now = time.time() + + magic = f.readline() + if not self.magic_re.search(magic): + f.close() + raise LoadError( + "%r does not look like a Netscape format cookies file" % + filename) + + try: + while 1: + line = f.readline() + if line == "": break + + # last field may be absent, so keep any trailing tab + if line.endswith("\n"): line = line[:-1] + + # skip comments and blank lines XXX what is $ for? + if (line.strip().startswith(("#", "$")) or + line.strip() == ""): + continue + + domain, domain_specified, path, secure, expires, name, value = \ + line.split("\t") + secure = (secure == "TRUE") + domain_specified = (domain_specified == "TRUE") + if name == "": + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name = value + value = None + + initial_dot = domain.startswith(".") + assert domain_specified == initial_dot + + discard = False + if expires == "": + expires = None + discard = True + + # assume path_specified is false + c = Cookie(0, name, value, + None, False, + domain, domain_specified, initial_dot, + path, False, + secure, + expires, + discard, + None, + None, + {}) + if not ignore_discard and c.discard: + continue + if not ignore_expires and c.is_expired(now): + continue + self.set_cookie(c) + + except IOError: + raise + except Exception: + _warn_unhandled_exception() + raise LoadError("invalid Netscape format cookies file %r: %r" % + (filename, line)) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + f = open(filename, "w") + try: + f.write(self.header) + now = time.time() + for cookie in self: + if not ignore_discard and cookie.discard: + continue + if not ignore_expires and cookie.is_expired(now): + continue + if cookie.secure: secure = "TRUE" + else: secure = "FALSE" + if cookie.domain.startswith("."): initial_dot = "TRUE" + else: initial_dot = "FALSE" + if cookie.expires is not None: + expires = str(cookie.expires) + else: + expires = "" + if cookie.value is None: + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name = "" + value = cookie.name + else: + name = cookie.name + value = cookie.value + f.write( + "\t".join([cookie.domain, initial_dot, cookie.path, + secure, expires, name, value])+ + "\n") + finally: + f.close() diff --git a/src/future/backports/http/cookies.py b/src/future/backports/http/cookies.py new file mode 100644 index 00000000..8bb61e22 --- /dev/null +++ b/src/future/backports/http/cookies.py @@ -0,0 +1,598 @@ +#### +# Copyright 2000 by Timothy O'Malley +# +# All Rights Reserved +# +# Permission to use, copy, modify, and distribute this software +# and its documentation for any purpose and without fee is hereby +# granted, provided that the above copyright notice appear in all +# copies and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Timothy O'Malley not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS +# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR +# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. +# +#### +# +# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp +# by Timothy O'Malley +# +# Cookie.py is a Python module for the handling of HTTP +# cookies as a Python dictionary. See RFC 2109 for more +# information on cookies. +# +# The original idea to treat Cookies as a dictionary came from +# Dave Mitchell (davem@magnet.com) in 1995, when he released the +# first version of nscookie.py. +# +#### + +r""" +http.cookies module ported to python-future from Py3.3 + +Here's a sample session to show how to use this module. +At the moment, this is the only documentation. + +The Basics +---------- + +Importing is easy... + + >>> from http import cookies + +Most of the time you start by creating a cookie. + + >>> C = cookies.SimpleCookie() + +Once you've created your Cookie, you can add values just as if it were +a dictionary. + + >>> C = cookies.SimpleCookie() + >>> C["fig"] = "newton" + >>> C["sugar"] = "wafer" + >>> C.output() + 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer' + +Notice that the printable representation of a Cookie is the +appropriate format for a Set-Cookie: header. This is the +default behavior. You can change the header and printed +attributes by using the .output() function + + >>> C = cookies.SimpleCookie() + >>> C["rocky"] = "road" + >>> C["rocky"]["path"] = "/cookie" + >>> print(C.output(header="Cookie:")) + Cookie: rocky=road; Path=/cookie + >>> print(C.output(attrs=[], header="Cookie:")) + Cookie: rocky=road + +The load() method of a Cookie extracts cookies from a string. In a +CGI script, you would use this method to extract the cookies from the +HTTP_COOKIE environment variable. + + >>> C = cookies.SimpleCookie() + >>> C.load("chips=ahoy; vienna=finger") + >>> C.output() + 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger' + +The load() method is darn-tootin smart about identifying cookies +within a string. Escaped quotation marks, nested semicolons, and other +such trickeries do not confuse it. + + >>> C = cookies.SimpleCookie() + >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') + >>> print(C) + Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" + +Each element of the Cookie also supports all of the RFC 2109 +Cookie attributes. Here's an example which sets the Path +attribute. + + >>> C = cookies.SimpleCookie() + >>> C["oreo"] = "doublestuff" + >>> C["oreo"]["path"] = "/" + >>> print(C) + Set-Cookie: oreo=doublestuff; Path=/ + +Each dictionary element has a 'value' attribute, which gives you +back the value associated with the key. + + >>> C = cookies.SimpleCookie() + >>> C["twix"] = "none for you" + >>> C["twix"].value + 'none for you' + +The SimpleCookie expects that all values should be standard strings. +Just to be sure, SimpleCookie invokes the str() builtin to convert +the value to a string, when the values are set dictionary-style. + + >>> C = cookies.SimpleCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + '7' + >>> C["string"].value + 'seven' + >>> C.output() + 'Set-Cookie: number=7\r\nSet-Cookie: string=seven' + +Finis. +""" +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from future.builtins import chr, dict, int, str +from future.utils import PY2, as_native_str + +# +# Import our required modules +# +import re +if PY2: + re.ASCII = 0 # for py2 compatibility +import string + +__all__ = ["CookieError", "BaseCookie", "SimpleCookie"] + +_nulljoin = ''.join +_semispacejoin = '; '.join +_spacejoin = ' '.join + +# +# Define an exception visible to External modules +# +class CookieError(Exception): + pass + + +# These quoting routines conform to the RFC2109 specification, which in +# turn references the character definitions from RFC2068. They provide +# a two-way quoting algorithm. Any non-text character is translated +# into a 4 character sequence: a forward-slash followed by the +# three-digit octal equivalent of the character. Any '\' or '"' is +# quoted with a preceeding '\' slash. +# +# These are taken from RFC2068 and RFC2109. +# _LegalChars is the list of chars which don't require "'s +# _Translator hash-table for fast quoting +# +_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:" +_Translator = { + '\000' : '\\000', '\001' : '\\001', '\002' : '\\002', + '\003' : '\\003', '\004' : '\\004', '\005' : '\\005', + '\006' : '\\006', '\007' : '\\007', '\010' : '\\010', + '\011' : '\\011', '\012' : '\\012', '\013' : '\\013', + '\014' : '\\014', '\015' : '\\015', '\016' : '\\016', + '\017' : '\\017', '\020' : '\\020', '\021' : '\\021', + '\022' : '\\022', '\023' : '\\023', '\024' : '\\024', + '\025' : '\\025', '\026' : '\\026', '\027' : '\\027', + '\030' : '\\030', '\031' : '\\031', '\032' : '\\032', + '\033' : '\\033', '\034' : '\\034', '\035' : '\\035', + '\036' : '\\036', '\037' : '\\037', + + # Because of the way browsers really handle cookies (as opposed + # to what the RFC says) we also encode , and ; + + ',' : '\\054', ';' : '\\073', + + '"' : '\\"', '\\' : '\\\\', + + '\177' : '\\177', '\200' : '\\200', '\201' : '\\201', + '\202' : '\\202', '\203' : '\\203', '\204' : '\\204', + '\205' : '\\205', '\206' : '\\206', '\207' : '\\207', + '\210' : '\\210', '\211' : '\\211', '\212' : '\\212', + '\213' : '\\213', '\214' : '\\214', '\215' : '\\215', + '\216' : '\\216', '\217' : '\\217', '\220' : '\\220', + '\221' : '\\221', '\222' : '\\222', '\223' : '\\223', + '\224' : '\\224', '\225' : '\\225', '\226' : '\\226', + '\227' : '\\227', '\230' : '\\230', '\231' : '\\231', + '\232' : '\\232', '\233' : '\\233', '\234' : '\\234', + '\235' : '\\235', '\236' : '\\236', '\237' : '\\237', + '\240' : '\\240', '\241' : '\\241', '\242' : '\\242', + '\243' : '\\243', '\244' : '\\244', '\245' : '\\245', + '\246' : '\\246', '\247' : '\\247', '\250' : '\\250', + '\251' : '\\251', '\252' : '\\252', '\253' : '\\253', + '\254' : '\\254', '\255' : '\\255', '\256' : '\\256', + '\257' : '\\257', '\260' : '\\260', '\261' : '\\261', + '\262' : '\\262', '\263' : '\\263', '\264' : '\\264', + '\265' : '\\265', '\266' : '\\266', '\267' : '\\267', + '\270' : '\\270', '\271' : '\\271', '\272' : '\\272', + '\273' : '\\273', '\274' : '\\274', '\275' : '\\275', + '\276' : '\\276', '\277' : '\\277', '\300' : '\\300', + '\301' : '\\301', '\302' : '\\302', '\303' : '\\303', + '\304' : '\\304', '\305' : '\\305', '\306' : '\\306', + '\307' : '\\307', '\310' : '\\310', '\311' : '\\311', + '\312' : '\\312', '\313' : '\\313', '\314' : '\\314', + '\315' : '\\315', '\316' : '\\316', '\317' : '\\317', + '\320' : '\\320', '\321' : '\\321', '\322' : '\\322', + '\323' : '\\323', '\324' : '\\324', '\325' : '\\325', + '\326' : '\\326', '\327' : '\\327', '\330' : '\\330', + '\331' : '\\331', '\332' : '\\332', '\333' : '\\333', + '\334' : '\\334', '\335' : '\\335', '\336' : '\\336', + '\337' : '\\337', '\340' : '\\340', '\341' : '\\341', + '\342' : '\\342', '\343' : '\\343', '\344' : '\\344', + '\345' : '\\345', '\346' : '\\346', '\347' : '\\347', + '\350' : '\\350', '\351' : '\\351', '\352' : '\\352', + '\353' : '\\353', '\354' : '\\354', '\355' : '\\355', + '\356' : '\\356', '\357' : '\\357', '\360' : '\\360', + '\361' : '\\361', '\362' : '\\362', '\363' : '\\363', + '\364' : '\\364', '\365' : '\\365', '\366' : '\\366', + '\367' : '\\367', '\370' : '\\370', '\371' : '\\371', + '\372' : '\\372', '\373' : '\\373', '\374' : '\\374', + '\375' : '\\375', '\376' : '\\376', '\377' : '\\377' + } + +def _quote(str, LegalChars=_LegalChars): + r"""Quote a string for use in a cookie header. + + If the string does not need to be double-quoted, then just return the + string. Otherwise, surround the string in doublequotes and quote + (with a \) special characters. + """ + if all(c in LegalChars for c in str): + return str + else: + return '"' + _nulljoin(_Translator.get(s, s) for s in str) + '"' + + +_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") +_QuotePatt = re.compile(r"[\\].") + +def _unquote(mystr): + # If there aren't any doublequotes, + # then there can't be any special characters. See RFC 2109. + if len(mystr) < 2: + return mystr + if mystr[0] != '"' or mystr[-1] != '"': + return mystr + + # We have to assume that we must decode this string. + # Down to work. + + # Remove the "s + mystr = mystr[1:-1] + + # Check for special sequences. Examples: + # \012 --> \n + # \" --> " + # + i = 0 + n = len(mystr) + res = [] + while 0 <= i < n: + o_match = _OctalPatt.search(mystr, i) + q_match = _QuotePatt.search(mystr, i) + if not o_match and not q_match: # Neither matched + res.append(mystr[i:]) + break + # else: + j = k = -1 + if o_match: + j = o_match.start(0) + if q_match: + k = q_match.start(0) + if q_match and (not o_match or k < j): # QuotePatt matched + res.append(mystr[i:k]) + res.append(mystr[k+1]) + i = k + 2 + else: # OctalPatt matched + res.append(mystr[i:j]) + res.append(chr(int(mystr[j+1:j+4], 8))) + i = j + 4 + return _nulljoin(res) + +# The _getdate() routine is used to set the expiration time in the cookie's HTTP +# header. By default, _getdate() returns the current time in the appropriate +# "expires" format for a Set-Cookie header. The one optional argument is an +# offset from now, in seconds. For example, an offset of -3600 means "one hour +# ago". The offset may be a floating point number. +# + +_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + +_monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + +def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname): + from time import gmtime, time + now = time() + year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future) + return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \ + (weekdayname[wd], day, monthname[month], year, hh, mm, ss) + + +class Morsel(dict): + """A class to hold ONE (key, value) pair. + + In a cookie, each such pair may have several attributes, so this class is + used to keep the attributes associated with the appropriate key,value pair. + This class also includes a coded_value attribute, which is used to hold + the network representation of the value. This is most useful when Python + objects are pickled for network transit. + """ + # RFC 2109 lists these attributes as reserved: + # path comment domain + # max-age secure version + # + # For historical reasons, these attributes are also reserved: + # expires + # + # This is an extension from Microsoft: + # httponly + # + # This dictionary provides a mapping from the lowercase + # variant on the left to the appropriate traditional + # formatting on the right. + _reserved = { + "expires" : "expires", + "path" : "Path", + "comment" : "Comment", + "domain" : "Domain", + "max-age" : "Max-Age", + "secure" : "secure", + "httponly" : "httponly", + "version" : "Version", + } + + _flags = set(['secure', 'httponly']) + + def __init__(self): + # Set defaults + self.key = self.value = self.coded_value = None + + # Set default attributes + for key in self._reserved: + dict.__setitem__(self, key, "") + + def __setitem__(self, K, V): + K = K.lower() + if not K in self._reserved: + raise CookieError("Invalid Attribute %s" % K) + dict.__setitem__(self, K, V) + + def isReservedKey(self, K): + return K.lower() in self._reserved + + def set(self, key, val, coded_val, LegalChars=_LegalChars): + # First we verify that the key isn't a reserved word + # Second we make sure it only contains legal characters + if key.lower() in self._reserved: + raise CookieError("Attempt to set a reserved key: %s" % key) + if any(c not in LegalChars for c in key): + raise CookieError("Illegal key value: %s" % key) + + # It's a good key, so save it. + self.key = key + self.value = val + self.coded_value = coded_val + + def output(self, attrs=None, header="Set-Cookie:"): + return "%s %s" % (header, self.OutputString(attrs)) + + __str__ = output + + @as_native_str() + def __repr__(self): + if PY2 and isinstance(self.value, unicode): + val = str(self.value) # make it a newstr to remove the u prefix + else: + val = self.value + return '<%s: %s=%s>' % (self.__class__.__name__, + str(self.key), repr(val)) + + def js_output(self, attrs=None): + # Print javascript + return """ + + """ % (self.OutputString(attrs).replace('"', r'\"')) + + def OutputString(self, attrs=None): + # Build up our result + # + result = [] + append = result.append + + # First, the key=value pair + append("%s=%s" % (self.key, self.coded_value)) + + # Now add any defined attributes + if attrs is None: + attrs = self._reserved + items = sorted(self.items()) + for key, value in items: + if value == "": + continue + if key not in attrs: + continue + if key == "expires" and isinstance(value, int): + append("%s=%s" % (self._reserved[key], _getdate(value))) + elif key == "max-age" and isinstance(value, int): + append("%s=%d" % (self._reserved[key], value)) + elif key == "secure": + append(str(self._reserved[key])) + elif key == "httponly": + append(str(self._reserved[key])) + else: + append("%s=%s" % (self._reserved[key], value)) + + # Return the result + return _semispacejoin(result) + + +# +# Pattern for finding cookie +# +# This used to be strict parsing based on the RFC2109 and RFC2068 +# specifications. I have since discovered that MSIE 3.0x doesn't +# follow the character rules outlined in those specs. As a +# result, the parsing rules here are less strict. +# + +_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]" +_CookiePattern = re.compile(r""" + (?x) # This is a verbose pattern + (?P # Start of group 'key' + """ + _LegalCharsPatt + r"""+? # Any word of at least one letter + ) # End of group 'key' + ( # Optional group: there may not be a value. + \s*=\s* # Equal Sign + (?P # Start of group 'val' + "(?:[^\\"]|\\.)*" # Any doublequoted string + | # or + \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr + | # or + """ + _LegalCharsPatt + r"""* # Any word or empty string + ) # End of group 'val' + )? # End of optional value group + \s* # Any number of spaces. + (\s+|;|$) # Ending either at space, semicolon, or EOS. + """, re.ASCII) # May be removed if safe. + + +# At long last, here is the cookie class. Using this class is almost just like +# using a dictionary. See this module's docstring for example usage. +# +class BaseCookie(dict): + """A container class for a set of Morsels.""" + + def value_decode(self, val): + """real_value, coded_value = value_decode(STRING) + Called prior to setting a cookie's value from the network + representation. The VALUE is the value read from HTTP + header. + Override this function to modify the behavior of cookies. + """ + return val, val + + def value_encode(self, val): + """real_value, coded_value = value_encode(VALUE) + Called prior to setting a cookie's value from the dictionary + representation. The VALUE is the value being assigned. + Override this function to modify the behavior of cookies. + """ + strval = str(val) + return strval, strval + + def __init__(self, input=None): + if input: + self.load(input) + + def __set(self, key, real_value, coded_value): + """Private method for setting a cookie's value""" + M = self.get(key, Morsel()) + M.set(key, real_value, coded_value) + dict.__setitem__(self, key, M) + + def __setitem__(self, key, value): + """Dictionary style assignment.""" + rval, cval = self.value_encode(value) + self.__set(key, rval, cval) + + def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): + """Return a string suitable for HTTP.""" + result = [] + items = sorted(self.items()) + for key, value in items: + result.append(value.output(attrs, header)) + return sep.join(result) + + __str__ = output + + @as_native_str() + def __repr__(self): + l = [] + items = sorted(self.items()) + for key, value in items: + if PY2 and isinstance(value.value, unicode): + val = str(value.value) # make it a newstr to remove the u prefix + else: + val = value.value + l.append('%s=%s' % (str(key), repr(val))) + return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l)) + + def js_output(self, attrs=None): + """Return a string suitable for JavaScript.""" + result = [] + items = sorted(self.items()) + for key, value in items: + result.append(value.js_output(attrs)) + return _nulljoin(result) + + def load(self, rawdata): + """Load cookies from a string (presumably HTTP_COOKIE) or + from a dictionary. Loading cookies from a dictionary 'd' + is equivalent to calling: + map(Cookie.__setitem__, d.keys(), d.values()) + """ + if isinstance(rawdata, str): + self.__parse_string(rawdata) + else: + # self.update() wouldn't call our custom __setitem__ + for key, value in rawdata.items(): + self[key] = value + return + + def __parse_string(self, mystr, patt=_CookiePattern): + i = 0 # Our starting point + n = len(mystr) # Length of string + M = None # current morsel + + while 0 <= i < n: + # Start looking for a cookie + match = patt.search(mystr, i) + if not match: + # No more cookies + break + + key, value = match.group("key"), match.group("val") + + i = match.end(0) + + # Parse the key, value in case it's metainfo + if key[0] == "$": + # We ignore attributes which pertain to the cookie + # mechanism as a whole. See RFC 2109. + # (Does anyone care?) + if M: + M[key[1:]] = value + elif key.lower() in Morsel._reserved: + if M: + if value is None: + if key.lower() in Morsel._flags: + M[key] = True + else: + M[key] = _unquote(value) + elif value is not None: + rval, cval = self.value_decode(value) + self.__set(key, rval, cval) + M = self[key] + + +class SimpleCookie(BaseCookie): + """ + SimpleCookie supports strings as cookie values. When setting + the value using the dictionary assignment notation, SimpleCookie + calls the builtin str() to convert the value to a string. Values + received from HTTP are kept as strings. + """ + def value_decode(self, val): + return _unquote(val), val + + def value_encode(self, val): + strval = str(val) + return strval, _quote(strval) diff --git a/future/standard_library/http/server.py b/src/future/backports/http/server.py similarity index 96% rename from future/standard_library/http/server.py rename to src/future/backports/http/server.py index b9b4d348..b1c11e0c 100644 --- a/future/standard_library/http/server.py +++ b/src/future/backports/http/server.py @@ -35,7 +35,7 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from future import standard_library +from future import utils from future.builtins import * @@ -91,10 +91,11 @@ __all__ = ["HTTPServer", "BaseHTTPRequestHandler"] -import html -import email.message -import email.parser -import http.client +from future.backports import html +from future.backports.http import client as http_client +from future.backports.urllib import parse as urllib_parse +from future.backports import socketserver + import io import mimetypes import os @@ -102,19 +103,11 @@ import select import shutil import socket # For gethostbyaddr() -import socketserver import sys import time import copy import argparse -# Not backported yet: -# import urllib.parse -# The old Py2 one instead: -import urllib -# Also from 2.7: -import cgi - # Default error message template DEFAULT_ERROR_MESSAGE = """\ @@ -332,9 +325,9 @@ def parse_request(self): # Examine the headers and look for a Connection directive. try: - self.headers = http.client.parse_headers(self.rfile, + self.headers = http_client.parse_headers(self.rfile, _class=self.MessageClass) - except http.client.LineTooLong: + except http_client.LineTooLong: self.send_error(400, "Line too long") return False @@ -584,7 +577,7 @@ def address_string(self): protocol_version = "HTTP/1.0" # MessageClass used to parse headers - MessageClass = http.client.HTTPMessage + MessageClass = http_client.HTTPMessage # Table mapping response codes to messages; entries have the # form {code: (shortmessage, longmessage)}. @@ -748,10 +741,7 @@ def list_directory(self, path): return None list.sort(key=lambda a: a.lower()) r = [] - # Urllib.parse not ported yet: - # displaypath = html.escape(urllib.parse.unquote(self.path)) - # Use this code from the Py2.7 httpservers.py module instead: - displaypath = cgi.escape(urllib.unquote(self.path)) + displaypath = html.escape(urllib_parse.unquote(self.path)) enc = sys.getfilesystemencoding() title = 'Directory listing for %s' % displaypath r.append('%s' - # % (urllib.parse.quote(linkname), html.escape(displayname))) - # Use this instead: r.append('
  • %s
  • ' - % (urllib.quote(linkname), cgi.escape(displayname))) + % (urllib_parse.quote(linkname), html.escape(displayname))) + # # Use this instead: + # r.append('
  • %s
  • ' + # % (urllib.quote(linkname), cgi.escape(displayname))) r.append('\n
    \n\n\n') encoded = '\n'.join(r).encode(enc) f = io.BytesIO() @@ -800,8 +789,7 @@ def translate_path(self, path): # abandon query parameters path = path.split('?',1)[0] path = path.split('#',1)[0] - # Was this: path = posixpath.normpath(urllib.parse.unquote(path)) - path = posixpath.normpath(urllib.unquote(path)) + path = posixpath.normpath(urllib_parse.unquote(path)) words = path.split('/') words = filter(None, words) path = os.getcwd() @@ -1059,8 +1047,7 @@ def run_cgi(self): env['SERVER_PROTOCOL'] = self.protocol_version env['SERVER_PORT'] = str(self.server.server_port) env['REQUEST_METHOD'] = self.command - # Was this: uqrest = urllib.parse.unquote(rest) - uqrest = urllib.unquote(rest) + uqrest = urllib_parse.unquote(rest) env['PATH_INFO'] = uqrest env['PATH_TRANSLATED'] = self.translate_path(uqrest) env['SCRIPT_NAME'] = scriptname @@ -1076,11 +1063,14 @@ def run_cgi(self): if authorization[0].lower() == "basic": try: authorization = authorization[1].encode('ascii') - # In Py3.3, was: authorization = base64.decodebytes(authorization).\ - # decode('ascii') - # Backport to Py2.7: - authorization = base64.decodestring(authorization).\ - decode('ascii') + if utils.PY3: + # In Py3.3, was: + authorization = base64.decodebytes(authorization).\ + decode('ascii') + else: + # Backport to Py2.7: + authorization = base64.decodestring(authorization).\ + decode('ascii') except (binascii.Error, UnicodeError): pass else: diff --git a/src/future/backports/misc.py b/src/future/backports/misc.py new file mode 100644 index 00000000..992b978f --- /dev/null +++ b/src/future/backports/misc.py @@ -0,0 +1,958 @@ +""" +Miscellaneous function (re)definitions from the Py3.4+ standard library +for Python 2.6/2.7. + +- math.ceil (for Python 2.7) +- collections.OrderedDict (for Python 2.6) +- collections.Counter (for Python 2.6) +- collections.ChainMap (for all versions prior to Python 3.3) +- itertools.count (for Python 2.6, with step parameter) +- subprocess.check_output (for Python 2.6) +- reprlib.recursive_repr (for Python 2.6+) +- functools.cmp_to_key (for Python 2.6) +""" + +from __future__ import absolute_import + +import subprocess +from math import ceil as oldceil + +from operator import itemgetter as _itemgetter, eq as _eq +import sys +import heapq as _heapq +from _weakref import proxy as _proxy +from itertools import repeat as _repeat, chain as _chain, starmap as _starmap +from socket import getaddrinfo, SOCK_STREAM, error, socket + +from future.utils import iteritems, itervalues, PY2, PY26, PY3 + +if PY2: + from collections import Mapping, MutableMapping +else: + from collections.abc import Mapping, MutableMapping + + +def ceil(x): + """ + Return the ceiling of x as an int. + This is the smallest integral value >= x. + """ + return int(oldceil(x)) + + +######################################################################## +### reprlib.recursive_repr decorator from Py3.4 +######################################################################## + +from itertools import islice + +if PY26: + # itertools.count in Py 2.6 doesn't accept a step parameter + def count(start=0, step=1): + while True: + yield start + start += step +else: + from itertools import count + + +if PY3: + try: + from _thread import get_ident + except ImportError: + from _dummy_thread import get_ident +else: + try: + from thread import get_ident + except ImportError: + from dummy_thread import get_ident + + +def recursive_repr(fillvalue='...'): + 'Decorator to make a repr function return fillvalue for a recursive call' + + def decorating_function(user_function): + repr_running = set() + + def wrapper(self): + key = id(self), get_ident() + if key in repr_running: + return fillvalue + repr_running.add(key) + try: + result = user_function(self) + finally: + repr_running.discard(key) + return result + + # Can't use functools.wraps() here because of bootstrap issues + wrapper.__module__ = getattr(user_function, '__module__') + wrapper.__doc__ = getattr(user_function, '__doc__') + wrapper.__name__ = getattr(user_function, '__name__') + wrapper.__annotations__ = getattr(user_function, '__annotations__', {}) + return wrapper + + return decorating_function + + +# OrderedDict Shim from Raymond Hettinger, python core dev +# http://code.activestate.com/recipes/576693-ordered-dictionary-for-py24/ +# here to support version 2.6. + +################################################################################ +### OrderedDict +################################################################################ + +class _Link(object): + __slots__ = 'prev', 'next', 'key', '__weakref__' + +class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as regular dictionaries. + + # The internal self.__map dict maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # The sentinel is in self.__hardroot with a weakref proxy in self.__root. + # The prev links are weakref proxies (to prevent circular references). + # Individual links are kept alive by the hard reference in self.__map. + # Those hard references disappear when a key is deleted from an OrderedDict. + + def __init__(*args, **kwds): + '''Initialize an ordered dictionary. The signature is the same as + regular dictionaries, but keyword arguments are not recommended because + their insertion order is arbitrary. + + ''' + if not args: + raise TypeError("descriptor '__init__' of 'OrderedDict' object " + "needs an argument") + self = args[0] + args = args[1:] + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__hardroot = _Link() + self.__root = root = _proxy(self.__hardroot) + root.prev = root.next = root + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, + dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link at the end of the linked list, + # and the inherited dictionary is updated with the new key/value pair. + if key not in self: + self.__map[key] = link = Link() + root = self.__root + last = root.prev + link.prev, link.next, link.key = last, root, key + last.next = link + root.prev = proxy(link) + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which gets + # removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link = self.__map.pop(key) + link_prev = link.prev + link_next = link.next + link_prev.next = link_next + link_next.prev = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + # Traverse the linked list in order. + root = self.__root + curr = root.next + while curr is not root: + yield curr.key + curr = curr.next + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + # Traverse the linked list in reverse order. + root = self.__root + curr = root.prev + while curr is not root: + yield curr.key + curr = curr.prev + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + root = self.__root + root.prev = root.next = root + self.__map.clear() + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root.prev + link_prev = link.prev + link_prev.next = root + root.prev = link_prev + else: + link = root.next + link_next = link.next + root.next = link_next + link_next.prev = root + key = link.key + del self.__map[key] + value = dict.pop(self, key) + return key, value + + def move_to_end(self, key, last=True): + '''Move an existing element to the end (or beginning if last==False). + + Raises KeyError if the element does not exist. + When last=True, acts like a fast version of self[key]=self.pop(key). + + ''' + link = self.__map[key] + link_prev = link.prev + link_next = link.next + link_prev.next = link_next + link_next.prev = link_prev + root = self.__root + if last: + last = root.prev + link.prev = last + link.next = root + last.next = root.prev = link + else: + first = root.next + link.prev = root + link.next = first + root.next = first.prev = link + + def __sizeof__(self): + sizeof = sys.getsizeof + n = len(self) + 1 # number of links including root + size = sizeof(self.__dict__) # instance dictionary + size += sizeof(self.__map) * 2 # internal dict and inherited dict + size += sizeof(self.__hardroot) * n # link objects + size += sizeof(self.__root) * n # proxy objects + return size + + update = __update = MutableMapping.update + keys = MutableMapping.keys + values = MutableMapping.values + items = MutableMapping.items + __ne__ = MutableMapping.__ne__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding + value. If key is not found, d is returned if given, otherwise KeyError + is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + @recursive_repr() + def __repr__(self): + 'od.__repr__() <==> repr(od)' + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, list(self.items())) + + def __reduce__(self): + 'Return state information for pickling' + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + return self.__class__, (), inst_dict or None, None, iter(self.items()) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S. + If not specified, the value defaults to None. + + ''' + self = cls() + for key in iterable: + self[key] = value + return self + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return dict.__eq__(self, other) and all(map(_eq, self, other)) + return dict.__eq__(self, other) + + +# {{{ http://code.activestate.com/recipes/576611/ (r11) + +try: + from operator import itemgetter + from heapq import nlargest +except ImportError: + pass + +######################################################################## +### Counter +######################################################################## + +def _count_elements(mapping, iterable): + 'Tally elements from the iterable.' + mapping_get = mapping.get + for elem in iterable: + mapping[elem] = mapping_get(elem, 0) + 1 + +class Counter(dict): + '''Dict subclass for counting hashable items. Sometimes called a bag + or multiset. Elements are stored as dictionary keys and their counts + are stored as dictionary values. + + >>> c = Counter('abcdeabcdabcaba') # count elements from a string + + >>> c.most_common(3) # three most common elements + [('a', 5), ('b', 4), ('c', 3)] + >>> sorted(c) # list all unique elements + ['a', 'b', 'c', 'd', 'e'] + >>> ''.join(sorted(c.elements())) # list elements with repetitions + 'aaaaabbbbcccdde' + >>> sum(c.values()) # total of all counts + 15 + + >>> c['a'] # count of letter 'a' + 5 + >>> for elem in 'shazam': # update counts from an iterable + ... c[elem] += 1 # by adding 1 to each element's count + >>> c['a'] # now there are seven 'a' + 7 + >>> del c['b'] # remove all 'b' + >>> c['b'] # now there are zero 'b' + 0 + + >>> d = Counter('simsalabim') # make another counter + >>> c.update(d) # add in the second counter + >>> c['a'] # now there are nine 'a' + 9 + + >>> c.clear() # empty the counter + >>> c + Counter() + + Note: If a count is set to zero or reduced to zero, it will remain + in the counter until the entry is deleted or the counter is cleared: + + >>> c = Counter('aaabbc') + >>> c['b'] -= 2 # reduce the count of 'b' by two + >>> c.most_common() # 'b' is still in, but its count is zero + [('a', 3), ('c', 1), ('b', 0)] + + ''' + # References: + # http://en.wikipedia.org/wiki/Multiset + # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html + # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm + # http://code.activestate.com/recipes/259174/ + # Knuth, TAOCP Vol. II section 4.6.3 + + def __init__(*args, **kwds): + '''Create a new, empty Counter object. And if given, count elements + from an input iterable. Or, initialize the count from another mapping + of elements to their counts. + + >>> c = Counter() # a new, empty counter + >>> c = Counter('gallahad') # a new counter from an iterable + >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping + >>> c = Counter(a=4, b=2) # a new counter from keyword args + + ''' + if not args: + raise TypeError("descriptor '__init__' of 'Counter' object " + "needs an argument") + self = args[0] + args = args[1:] + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + super(Counter, self).__init__() + self.update(*args, **kwds) + + def __missing__(self, key): + 'The count of elements not in the Counter is zero.' + # Needed so that self[missing_item] does not raise KeyError + return 0 + + def most_common(self, n=None): + '''List the n most common elements and their counts from the most + common to the least. If n is None, then list all element counts. + + >>> Counter('abcdeabcdabcaba').most_common(3) + [('a', 5), ('b', 4), ('c', 3)] + + ''' + # Emulate Bag.sortedByCount from Smalltalk + if n is None: + return sorted(self.items(), key=_itemgetter(1), reverse=True) + return _heapq.nlargest(n, self.items(), key=_itemgetter(1)) + + def elements(self): + '''Iterator over elements repeating each as many times as its count. + + >>> c = Counter('ABCABC') + >>> sorted(c.elements()) + ['A', 'A', 'B', 'B', 'C', 'C'] + + # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1 + >>> prime_factors = Counter({2: 2, 3: 3, 17: 1}) + >>> product = 1 + >>> for factor in prime_factors.elements(): # loop over factors + ... product *= factor # and multiply them + >>> product + 1836 + + Note, if an element's count has been set to zero or is a negative + number, elements() will ignore it. + + ''' + # Emulate Bag.do from Smalltalk and Multiset.begin from C++. + return _chain.from_iterable(_starmap(_repeat, self.items())) + + # Override dict methods where necessary + + @classmethod + def fromkeys(cls, iterable, v=None): + # There is no equivalent method for counters because setting v=1 + # means that no element can have a count greater than one. + raise NotImplementedError( + 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') + + def update(*args, **kwds): + '''Like dict.update() but add counts instead of replacing them. + + Source can be an iterable, a dictionary, or another Counter instance. + + >>> c = Counter('which') + >>> c.update('witch') # add elements from another iterable + >>> d = Counter('watch') + >>> c.update(d) # add elements from another counter + >>> c['h'] # four 'h' in which, witch, and watch + 4 + + ''' + # The regular dict.update() operation makes no sense here because the + # replace behavior results in the some of original untouched counts + # being mixed-in with all of the other counts for a mismash that + # doesn't have a straight-forward interpretation in most counting + # contexts. Instead, we implement straight-addition. Both the inputs + # and outputs are allowed to contain zero and negative counts. + + if not args: + raise TypeError("descriptor 'update' of 'Counter' object " + "needs an argument") + self = args[0] + args = args[1:] + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + iterable = args[0] if args else None + if iterable is not None: + if isinstance(iterable, Mapping): + if self: + self_get = self.get + for elem, count in iterable.items(): + self[elem] = count + self_get(elem, 0) + else: + super(Counter, self).update(iterable) # fast path when counter is empty + else: + _count_elements(self, iterable) + if kwds: + self.update(kwds) + + def subtract(*args, **kwds): + '''Like dict.update() but subtracts counts instead of replacing them. + Counts can be reduced below zero. Both the inputs and outputs are + allowed to contain zero and negative counts. + + Source can be an iterable, a dictionary, or another Counter instance. + + >>> c = Counter('which') + >>> c.subtract('witch') # subtract elements from another iterable + >>> c.subtract(Counter('watch')) # subtract elements from another counter + >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch + 0 + >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch + -1 + + ''' + if not args: + raise TypeError("descriptor 'subtract' of 'Counter' object " + "needs an argument") + self = args[0] + args = args[1:] + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + iterable = args[0] if args else None + if iterable is not None: + self_get = self.get + if isinstance(iterable, Mapping): + for elem, count in iterable.items(): + self[elem] = self_get(elem, 0) - count + else: + for elem in iterable: + self[elem] = self_get(elem, 0) - 1 + if kwds: + self.subtract(kwds) + + def copy(self): + 'Return a shallow copy.' + return self.__class__(self) + + def __reduce__(self): + return self.__class__, (dict(self),) + + def __delitem__(self, elem): + 'Like dict.__delitem__() but does not raise KeyError for missing values.' + if elem in self: + super(Counter, self).__delitem__(elem) + + def __repr__(self): + if not self: + return '%s()' % self.__class__.__name__ + try: + items = ', '.join(map('%r: %r'.__mod__, self.most_common())) + return '%s({%s})' % (self.__class__.__name__, items) + except TypeError: + # handle case where values are not orderable + return '{0}({1!r})'.format(self.__class__.__name__, dict(self)) + + # Multiset-style mathematical operations discussed in: + # Knuth TAOCP Volume II section 4.6.3 exercise 19 + # and at http://en.wikipedia.org/wiki/Multiset + # + # Outputs guaranteed to only include positive counts. + # + # To strip negative and zero counts, add-in an empty counter: + # c += Counter() + + def __add__(self, other): + '''Add counts from two counters. + + >>> Counter('abbb') + Counter('bcc') + Counter({'b': 4, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + newcount = count + other[elem] + if newcount > 0: + result[elem] = newcount + for elem, count in other.items(): + if elem not in self and count > 0: + result[elem] = count + return result + + def __sub__(self, other): + ''' Subtract count, but keep only results with positive counts. + + >>> Counter('abbbc') - Counter('bccd') + Counter({'b': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + newcount = count - other[elem] + if newcount > 0: + result[elem] = newcount + for elem, count in other.items(): + if elem not in self and count < 0: + result[elem] = 0 - count + return result + + def __or__(self, other): + '''Union is the maximum of value in either of the input counters. + + >>> Counter('abbb') | Counter('bcc') + Counter({'b': 3, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + other_count = other[elem] + newcount = other_count if count < other_count else count + if newcount > 0: + result[elem] = newcount + for elem, count in other.items(): + if elem not in self and count > 0: + result[elem] = count + return result + + def __and__(self, other): + ''' Intersection is the minimum of corresponding counts. + + >>> Counter('abbb') & Counter('bcc') + Counter({'b': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + other_count = other[elem] + newcount = count if count < other_count else other_count + if newcount > 0: + result[elem] = newcount + return result + + def __pos__(self): + 'Adds an empty counter, effectively stripping negative and zero counts' + return self + Counter() + + def __neg__(self): + '''Subtracts from an empty counter. Strips positive and zero counts, + and flips the sign on negative counts. + + ''' + return Counter() - self + + def _keep_positive(self): + '''Internal method to strip elements with a negative or zero count''' + nonpositive = [elem for elem, count in self.items() if not count > 0] + for elem in nonpositive: + del self[elem] + return self + + def __iadd__(self, other): + '''Inplace add from another counter, keeping only positive counts. + + >>> c = Counter('abbb') + >>> c += Counter('bcc') + >>> c + Counter({'b': 4, 'c': 2, 'a': 1}) + + ''' + for elem, count in other.items(): + self[elem] += count + return self._keep_positive() + + def __isub__(self, other): + '''Inplace subtract counter, but keep only results with positive counts. + + >>> c = Counter('abbbc') + >>> c -= Counter('bccd') + >>> c + Counter({'b': 2, 'a': 1}) + + ''' + for elem, count in other.items(): + self[elem] -= count + return self._keep_positive() + + def __ior__(self, other): + '''Inplace union is the maximum of value from either counter. + + >>> c = Counter('abbb') + >>> c |= Counter('bcc') + >>> c + Counter({'b': 3, 'c': 2, 'a': 1}) + + ''' + for elem, other_count in other.items(): + count = self[elem] + if other_count > count: + self[elem] = other_count + return self._keep_positive() + + def __iand__(self, other): + '''Inplace intersection is the minimum of corresponding counts. + + >>> c = Counter('abbb') + >>> c &= Counter('bcc') + >>> c + Counter({'b': 1}) + + ''' + for elem, count in self.items(): + other_count = other[elem] + if other_count < count: + self[elem] = other_count + return self._keep_positive() + + +def check_output(*popenargs, **kwargs): + """ + For Python 2.6 compatibility: see + http://stackoverflow.com/questions/4814970/ + """ + + if 'stdout' in kwargs: + raise ValueError('stdout argument not allowed, it will be overridden.') + process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + raise subprocess.CalledProcessError(retcode, cmd) + return output + + +def count(start=0, step=1): + """ + ``itertools.count`` in Py 2.6 doesn't accept a step + parameter. This is an enhanced version of ``itertools.count`` + for Py2.6 equivalent to ``itertools.count`` in Python 2.7+. + """ + while True: + yield start + start += step + + +######################################################################## +### ChainMap (helper for configparser and string.Template) +### From the Py3.4 source code. See also: +### https://github.com/kkxue/Py2ChainMap/blob/master/py2chainmap.py +######################################################################## + +class ChainMap(MutableMapping): + ''' A ChainMap groups multiple dicts (or other mappings) together + to create a single, updateable view. + + The underlying mappings are stored in a list. That list is public and can + accessed or updated using the *maps* attribute. There is no other state. + + Lookups search the underlying mappings successively until a key is found. + In contrast, writes, updates, and deletions only operate on the first + mapping. + + ''' + + def __init__(self, *maps): + '''Initialize a ChainMap by setting *maps* to the given mappings. + If no mappings are provided, a single empty dictionary is used. + + ''' + self.maps = list(maps) or [{}] # always at least one map + + def __missing__(self, key): + raise KeyError(key) + + def __getitem__(self, key): + for mapping in self.maps: + try: + return mapping[key] # can't use 'key in mapping' with defaultdict + except KeyError: + pass + return self.__missing__(key) # support subclasses that define __missing__ + + def get(self, key, default=None): + return self[key] if key in self else default + + def __len__(self): + return len(set().union(*self.maps)) # reuses stored hash values if possible + + def __iter__(self): + return iter(set().union(*self.maps)) + + def __contains__(self, key): + return any(key in m for m in self.maps) + + def __bool__(self): + return any(self.maps) + + # Py2 compatibility: + __nonzero__ = __bool__ + + @recursive_repr() + def __repr__(self): + return '{0.__class__.__name__}({1})'.format( + self, ', '.join(map(repr, self.maps))) + + @classmethod + def fromkeys(cls, iterable, *args): + 'Create a ChainMap with a single dict created from the iterable.' + return cls(dict.fromkeys(iterable, *args)) + + def copy(self): + 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' + return self.__class__(self.maps[0].copy(), *self.maps[1:]) + + __copy__ = copy + + def new_child(self, m=None): # like Django's Context.push() + ''' + New ChainMap with a new map followed by all previous maps. If no + map is provided, an empty dict is used. + ''' + if m is None: + m = {} + return self.__class__(m, *self.maps) + + @property + def parents(self): # like Django's Context.pop() + 'New ChainMap from maps[1:].' + return self.__class__(*self.maps[1:]) + + def __setitem__(self, key, value): + self.maps[0][key] = value + + def __delitem__(self, key): + try: + del self.maps[0][key] + except KeyError: + raise KeyError('Key not found in the first mapping: {0!r}'.format(key)) + + def popitem(self): + 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.' + try: + return self.maps[0].popitem() + except KeyError: + raise KeyError('No keys found in the first mapping.') + + def pop(self, key, *args): + 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].' + try: + return self.maps[0].pop(key, *args) + except KeyError: + raise KeyError('Key not found in the first mapping: {0!r}'.format(key)) + + def clear(self): + 'Clear maps[0], leaving maps[1:] intact.' + self.maps[0].clear() + + +# Re-use the same sentinel as in the Python stdlib socket module: +from socket import _GLOBAL_DEFAULT_TIMEOUT +# Was: _GLOBAL_DEFAULT_TIMEOUT = object() + + +def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT, + source_address=None): + """Backport of 3-argument create_connection() for Py2.6. + + Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + err = None + for res in getaddrinfo(host, port, 0, SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket(af, socktype, proto) + if timeout is not _GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + + except error as _: + err = _ + if sock is not None: + sock.close() + + if err is not None: + raise err + else: + raise error("getaddrinfo returns an empty list") + +# Backport from Py2.7 for Py2.6: +def cmp_to_key(mycmp): + """Convert a cmp= function into a key= function""" + class K(object): + __slots__ = ['obj'] + def __init__(self, obj, *args): + self.obj = obj + def __lt__(self, other): + return mycmp(self.obj, other.obj) < 0 + def __gt__(self, other): + return mycmp(self.obj, other.obj) > 0 + def __eq__(self, other): + return mycmp(self.obj, other.obj) == 0 + def __le__(self, other): + return mycmp(self.obj, other.obj) <= 0 + def __ge__(self, other): + return mycmp(self.obj, other.obj) >= 0 + def __ne__(self, other): + return mycmp(self.obj, other.obj) != 0 + def __hash__(self): + raise TypeError('hash not implemented') + return K + +# Back up our definitions above in case they're useful +_OrderedDict = OrderedDict +_Counter = Counter +_check_output = check_output +_count = count +_ceil = ceil +__count_elements = _count_elements +_recursive_repr = recursive_repr +_ChainMap = ChainMap +_create_connection = create_connection +_cmp_to_key = cmp_to_key + +# Overwrite the definitions above with the usual ones +# from the standard library: +if sys.version_info >= (2, 7): + from collections import OrderedDict, Counter + from itertools import count + from functools import cmp_to_key + try: + from subprocess import check_output + except ImportError: + # Not available. This happens with Google App Engine: see issue #231 + pass + from socket import create_connection + +if sys.version_info >= (3, 0): + from math import ceil + from collections import _count_elements + +if sys.version_info >= (3, 3): + from reprlib import recursive_repr + from collections import ChainMap diff --git a/src/future/backports/socket.py b/src/future/backports/socket.py new file mode 100644 index 00000000..930e1dae --- /dev/null +++ b/src/future/backports/socket.py @@ -0,0 +1,454 @@ +# Wrapper module for _socket, providing some additional facilities +# implemented in Python. + +"""\ +This module provides socket operations and some related functions. +On Unix, it supports IP (Internet Protocol) and Unix domain sockets. +On other systems, it only supports IP. Functions specific for a +socket are available as methods of the socket object. + +Functions: + +socket() -- create a new socket object +socketpair() -- create a pair of new socket objects [*] +fromfd() -- create a socket object from an open file descriptor [*] +fromshare() -- create a socket object from data received from socket.share() [*] +gethostname() -- return the current hostname +gethostbyname() -- map a hostname to its IP number +gethostbyaddr() -- map an IP number or hostname to DNS info +getservbyname() -- map a service name and a protocol name to a port number +getprotobyname() -- map a protocol name (e.g. 'tcp') to a number +ntohs(), ntohl() -- convert 16, 32 bit int from network to host byte order +htons(), htonl() -- convert 16, 32 bit int from host to network byte order +inet_aton() -- convert IP addr string (123.45.67.89) to 32-bit packed format +inet_ntoa() -- convert 32-bit packed format IP to string (123.45.67.89) +socket.getdefaulttimeout() -- get the default timeout value +socket.setdefaulttimeout() -- set the default timeout value +create_connection() -- connects to an address, with an optional timeout and + optional source address. + + [*] not available on all platforms! + +Special objects: + +SocketType -- type object for socket objects +error -- exception raised for I/O errors +has_ipv6 -- boolean value indicating if IPv6 is supported + +Integer constants: + +AF_INET, AF_UNIX -- socket domains (first argument to socket() call) +SOCK_STREAM, SOCK_DGRAM, SOCK_RAW -- socket types (second argument) + +Many other constants may be defined; these may be used in calls to +the setsockopt() and getsockopt() methods. +""" + +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from future.builtins import super + +import _socket +from _socket import * + +import os, sys, io + +try: + import errno +except ImportError: + errno = None +EBADF = getattr(errno, 'EBADF', 9) +EAGAIN = getattr(errno, 'EAGAIN', 11) +EWOULDBLOCK = getattr(errno, 'EWOULDBLOCK', 11) + +__all__ = ["getfqdn", "create_connection"] +__all__.extend(os._get_exports_list(_socket)) + + +_realsocket = socket + +# WSA error codes +if sys.platform.lower().startswith("win"): + errorTab = {} + errorTab[10004] = "The operation was interrupted." + errorTab[10009] = "A bad file handle was passed." + errorTab[10013] = "Permission denied." + errorTab[10014] = "A fault occurred on the network??" # WSAEFAULT + errorTab[10022] = "An invalid operation was attempted." + errorTab[10035] = "The socket operation would block" + errorTab[10036] = "A blocking operation is already in progress." + errorTab[10048] = "The network address is in use." + errorTab[10054] = "The connection has been reset." + errorTab[10058] = "The network has been shut down." + errorTab[10060] = "The operation timed out." + errorTab[10061] = "Connection refused." + errorTab[10063] = "The name is too long." + errorTab[10064] = "The host is down." + errorTab[10065] = "The host is unreachable." + __all__.append("errorTab") + + +class socket(_socket.socket): + + """A subclass of _socket.socket adding the makefile() method.""" + + __slots__ = ["__weakref__", "_io_refs", "_closed"] + + def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0, fileno=None): + if fileno is None: + _socket.socket.__init__(self, family, type, proto) + else: + _socket.socket.__init__(self, family, type, proto, fileno) + self._io_refs = 0 + self._closed = False + + def __enter__(self): + return self + + def __exit__(self, *args): + if not self._closed: + self.close() + + def __repr__(self): + """Wrap __repr__() to reveal the real class name.""" + s = _socket.socket.__repr__(self) + if s.startswith(" socket object + + Return a new socket object connected to the same system resource. + """ + fd = dup(self.fileno()) + sock = self.__class__(self.family, self.type, self.proto, fileno=fd) + sock.settimeout(self.gettimeout()) + return sock + + def accept(self): + """accept() -> (socket object, address info) + + Wait for an incoming connection. Return a new socket + representing the connection, and the address of the client. + For IP sockets, the address info is a pair (hostaddr, port). + """ + fd, addr = self._accept() + sock = socket(self.family, self.type, self.proto, fileno=fd) + # Issue #7995: if no default timeout is set and the listening + # socket had a (non-zero) timeout, force the new socket in blocking + # mode to override platform-specific socket flags inheritance. + if getdefaulttimeout() is None and self.gettimeout(): + sock.setblocking(True) + return sock, addr + + def makefile(self, mode="r", buffering=None, **_3to2kwargs): + """makefile(...) -> an I/O stream connected to the socket + + The arguments are as for io.open() after the filename, + except the only mode characters supported are 'r', 'w' and 'b'. + The semantics are similar too. (XXX refactor to share code?) + """ + if 'newline' in _3to2kwargs: newline = _3to2kwargs['newline']; del _3to2kwargs['newline'] + else: newline = None + if 'errors' in _3to2kwargs: errors = _3to2kwargs['errors']; del _3to2kwargs['errors'] + else: errors = None + if 'encoding' in _3to2kwargs: encoding = _3to2kwargs['encoding']; del _3to2kwargs['encoding'] + else: encoding = None + for c in mode: + if c not in ("r", "w", "b"): + raise ValueError("invalid mode %r (only r, w, b allowed)") + writing = "w" in mode + reading = "r" in mode or not writing + assert reading or writing + binary = "b" in mode + rawmode = "" + if reading: + rawmode += "r" + if writing: + rawmode += "w" + raw = SocketIO(self, rawmode) + self._io_refs += 1 + if buffering is None: + buffering = -1 + if buffering < 0: + buffering = io.DEFAULT_BUFFER_SIZE + if buffering == 0: + if not binary: + raise ValueError("unbuffered streams must be binary") + return raw + if reading and writing: + buffer = io.BufferedRWPair(raw, raw, buffering) + elif reading: + buffer = io.BufferedReader(raw, buffering) + else: + assert writing + buffer = io.BufferedWriter(raw, buffering) + if binary: + return buffer + text = io.TextIOWrapper(buffer, encoding, errors, newline) + text.mode = mode + return text + + def _decref_socketios(self): + if self._io_refs > 0: + self._io_refs -= 1 + if self._closed: + self.close() + + def _real_close(self, _ss=_socket.socket): + # This function should not reference any globals. See issue #808164. + _ss.close(self) + + def close(self): + # This function should not reference any globals. See issue #808164. + self._closed = True + if self._io_refs <= 0: + self._real_close() + + def detach(self): + """detach() -> file descriptor + + Close the socket object without closing the underlying file descriptor. + The object cannot be used after this call, but the file descriptor + can be reused for other purposes. The file descriptor is returned. + """ + self._closed = True + return super().detach() + +def fromfd(fd, family, type, proto=0): + """ fromfd(fd, family, type[, proto]) -> socket object + + Create a socket object from a duplicate of the given file + descriptor. The remaining arguments are the same as for socket(). + """ + nfd = dup(fd) + return socket(family, type, proto, nfd) + +if hasattr(_socket.socket, "share"): + def fromshare(info): + """ fromshare(info) -> socket object + + Create a socket object from a the bytes object returned by + socket.share(pid). + """ + return socket(0, 0, 0, info) + +if hasattr(_socket, "socketpair"): + + def socketpair(family=None, type=SOCK_STREAM, proto=0): + """socketpair([family[, type[, proto]]]) -> (socket object, socket object) + + Create a pair of socket objects from the sockets returned by the platform + socketpair() function. + The arguments are the same as for socket() except the default family is + AF_UNIX if defined on the platform; otherwise, the default is AF_INET. + """ + if family is None: + try: + family = AF_UNIX + except NameError: + family = AF_INET + a, b = _socket.socketpair(family, type, proto) + a = socket(family, type, proto, a.detach()) + b = socket(family, type, proto, b.detach()) + return a, b + + +_blocking_errnos = set([EAGAIN, EWOULDBLOCK]) + +class SocketIO(io.RawIOBase): + + """Raw I/O implementation for stream sockets. + + This class supports the makefile() method on sockets. It provides + the raw I/O interface on top of a socket object. + """ + + # One might wonder why not let FileIO do the job instead. There are two + # main reasons why FileIO is not adapted: + # - it wouldn't work under Windows (where you can't used read() and + # write() on a socket handle) + # - it wouldn't work with socket timeouts (FileIO would ignore the + # timeout and consider the socket non-blocking) + + # XXX More docs + + def __init__(self, sock, mode): + if mode not in ("r", "w", "rw", "rb", "wb", "rwb"): + raise ValueError("invalid mode: %r" % mode) + io.RawIOBase.__init__(self) + self._sock = sock + if "b" not in mode: + mode += "b" + self._mode = mode + self._reading = "r" in mode + self._writing = "w" in mode + self._timeout_occurred = False + + def readinto(self, b): + """Read up to len(b) bytes into the writable buffer *b* and return + the number of bytes read. If the socket is non-blocking and no bytes + are available, None is returned. + + If *b* is non-empty, a 0 return value indicates that the connection + was shutdown at the other end. + """ + self._checkClosed() + self._checkReadable() + if self._timeout_occurred: + raise IOError("cannot read from timed out object") + while True: + try: + return self._sock.recv_into(b) + except timeout: + self._timeout_occurred = True + raise + # except InterruptedError: + # continue + except error as e: + if e.args[0] in _blocking_errnos: + return None + raise + + def write(self, b): + """Write the given bytes or bytearray object *b* to the socket + and return the number of bytes written. This can be less than + len(b) if not all data could be written. If the socket is + non-blocking and no bytes could be written None is returned. + """ + self._checkClosed() + self._checkWritable() + try: + return self._sock.send(b) + except error as e: + # XXX what about EINTR? + if e.args[0] in _blocking_errnos: + return None + raise + + def readable(self): + """True if the SocketIO is open for reading. + """ + if self.closed: + raise ValueError("I/O operation on closed socket.") + return self._reading + + def writable(self): + """True if the SocketIO is open for writing. + """ + if self.closed: + raise ValueError("I/O operation on closed socket.") + return self._writing + + def seekable(self): + """True if the SocketIO is open for seeking. + """ + if self.closed: + raise ValueError("I/O operation on closed socket.") + return super().seekable() + + def fileno(self): + """Return the file descriptor of the underlying socket. + """ + self._checkClosed() + return self._sock.fileno() + + @property + def name(self): + if not self.closed: + return self.fileno() + else: + return -1 + + @property + def mode(self): + return self._mode + + def close(self): + """Close the SocketIO object. This doesn't close the underlying + socket, except if all references to it have disappeared. + """ + if self.closed: + return + io.RawIOBase.close(self) + self._sock._decref_socketios() + self._sock = None + + +def getfqdn(name=''): + """Get fully qualified domain name from name. + + An empty argument is interpreted as meaning the local host. + + First the hostname returned by gethostbyaddr() is checked, then + possibly existing aliases. In case no FQDN is available, hostname + from gethostname() is returned. + """ + name = name.strip() + if not name or name == '0.0.0.0': + name = gethostname() + try: + hostname, aliases, ipaddrs = gethostbyaddr(name) + except error: + pass + else: + aliases.insert(0, hostname) + for name in aliases: + if '.' in name: + break + else: + name = hostname + return name + + +# Re-use the same sentinel as in the Python stdlib socket module: +from socket import _GLOBAL_DEFAULT_TIMEOUT +# Was: _GLOBAL_DEFAULT_TIMEOUT = object() + + +def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT, + source_address=None): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + err = None + for res in getaddrinfo(host, port, 0, SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket(af, socktype, proto) + if timeout is not _GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + + except error as _: + err = _ + if sock is not None: + sock.close() + + if err is not None: + raise err + else: + raise error("getaddrinfo returns an empty list") diff --git a/future/standard_library/socketserver.py b/src/future/backports/socketserver.py similarity index 100% rename from future/standard_library/socketserver.py rename to src/future/backports/socketserver.py diff --git a/future/standard_library/test/__init__.py b/src/future/backports/test/__init__.py similarity index 100% rename from future/standard_library/test/__init__.py rename to src/future/backports/test/__init__.py diff --git a/src/future/backports/test/badcert.pem b/src/future/backports/test/badcert.pem new file mode 100644 index 00000000..c4191460 --- /dev/null +++ b/src/future/backports/test/badcert.pem @@ -0,0 +1,36 @@ +-----BEGIN RSA PRIVATE KEY----- +MIICXwIBAAKBgQC8ddrhm+LutBvjYcQlnH21PPIseJ1JVG2HMmN2CmZk2YukO+9L +opdJhTvbGfEj0DQs1IE8M+kTUyOmuKfVrFMKwtVeCJphrAnhoz7TYOuLBSqt7lVH +fhi/VwovESJlaBOp+WMnfhcduPEYHYx/6cnVapIkZnLt30zu2um+DzA9jQIDAQAB +AoGBAK0FZpaKj6WnJZN0RqhhK+ggtBWwBnc0U/ozgKz2j1s3fsShYeiGtW6CK5nU +D1dZ5wzhbGThI7LiOXDvRucc9n7vUgi0alqPQ/PFodPxAN/eEYkmXQ7W2k7zwsDA +IUK0KUhktQbLu8qF/m8qM86ba9y9/9YkXuQbZ3COl5ahTZrhAkEA301P08RKv3KM +oXnGU2UHTuJ1MAD2hOrPxjD4/wxA/39EWG9bZczbJyggB4RHu0I3NOSFjAm3HQm0 +ANOu5QK9owJBANgOeLfNNcF4pp+UikRFqxk5hULqRAWzVxVrWe85FlPm0VVmHbb/ +loif7mqjU8o1jTd/LM7RD9f2usZyE2psaw8CQQCNLhkpX3KO5kKJmS9N7JMZSc4j +oog58yeYO8BBqKKzpug0LXuQultYv2K4veaIO04iL9VLe5z9S/Q1jaCHBBuXAkEA +z8gjGoi1AOp6PBBLZNsncCvcV/0aC+1se4HxTNo2+duKSDnbq+ljqOM+E7odU+Nq +ewvIWOG//e8fssd0mq3HywJBAJ8l/c8GVmrpFTx8r/nZ2Pyyjt3dH1widooDXYSV +q6Gbf41Llo5sYAtmxdndTLASuHKecacTgZVhy0FryZpLKrU= +-----END RSA PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +Just bad cert data +-----END CERTIFICATE----- +-----BEGIN RSA PRIVATE KEY----- +MIICXwIBAAKBgQC8ddrhm+LutBvjYcQlnH21PPIseJ1JVG2HMmN2CmZk2YukO+9L +opdJhTvbGfEj0DQs1IE8M+kTUyOmuKfVrFMKwtVeCJphrAnhoz7TYOuLBSqt7lVH +fhi/VwovESJlaBOp+WMnfhcduPEYHYx/6cnVapIkZnLt30zu2um+DzA9jQIDAQAB +AoGBAK0FZpaKj6WnJZN0RqhhK+ggtBWwBnc0U/ozgKz2j1s3fsShYeiGtW6CK5nU +D1dZ5wzhbGThI7LiOXDvRucc9n7vUgi0alqPQ/PFodPxAN/eEYkmXQ7W2k7zwsDA +IUK0KUhktQbLu8qF/m8qM86ba9y9/9YkXuQbZ3COl5ahTZrhAkEA301P08RKv3KM +oXnGU2UHTuJ1MAD2hOrPxjD4/wxA/39EWG9bZczbJyggB4RHu0I3NOSFjAm3HQm0 +ANOu5QK9owJBANgOeLfNNcF4pp+UikRFqxk5hULqRAWzVxVrWe85FlPm0VVmHbb/ +loif7mqjU8o1jTd/LM7RD9f2usZyE2psaw8CQQCNLhkpX3KO5kKJmS9N7JMZSc4j +oog58yeYO8BBqKKzpug0LXuQultYv2K4veaIO04iL9VLe5z9S/Q1jaCHBBuXAkEA +z8gjGoi1AOp6PBBLZNsncCvcV/0aC+1se4HxTNo2+duKSDnbq+ljqOM+E7odU+Nq +ewvIWOG//e8fssd0mq3HywJBAJ8l/c8GVmrpFTx8r/nZ2Pyyjt3dH1widooDXYSV +q6Gbf41Llo5sYAtmxdndTLASuHKecacTgZVhy0FryZpLKrU= +-----END RSA PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +Just bad cert data +-----END CERTIFICATE----- diff --git a/src/future/backports/test/badkey.pem b/src/future/backports/test/badkey.pem new file mode 100644 index 00000000..1c8a9557 --- /dev/null +++ b/src/future/backports/test/badkey.pem @@ -0,0 +1,40 @@ +-----BEGIN RSA PRIVATE KEY----- +Bad Key, though the cert should be OK +-----END RSA PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICpzCCAhCgAwIBAgIJAP+qStv1cIGNMA0GCSqGSIb3DQEBBQUAMIGJMQswCQYD +VQQGEwJVUzERMA8GA1UECBMIRGVsYXdhcmUxEzARBgNVBAcTCldpbG1pbmd0b24x +IzAhBgNVBAoTGlB5dGhvbiBTb2Z0d2FyZSBGb3VuZGF0aW9uMQwwCgYDVQQLEwNT +U0wxHzAdBgNVBAMTFnNvbWVtYWNoaW5lLnB5dGhvbi5vcmcwHhcNMDcwODI3MTY1 +NDUwWhcNMTMwMjE2MTY1NDUwWjCBiTELMAkGA1UEBhMCVVMxETAPBgNVBAgTCERl +bGF3YXJlMRMwEQYDVQQHEwpXaWxtaW5ndG9uMSMwIQYDVQQKExpQeXRob24gU29m +dHdhcmUgRm91bmRhdGlvbjEMMAoGA1UECxMDU1NMMR8wHQYDVQQDExZzb21lbWFj +aGluZS5weXRob24ub3JnMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC8ddrh +m+LutBvjYcQlnH21PPIseJ1JVG2HMmN2CmZk2YukO+9LopdJhTvbGfEj0DQs1IE8 +M+kTUyOmuKfVrFMKwtVeCJphrAnhoz7TYOuLBSqt7lVHfhi/VwovESJlaBOp+WMn +fhcduPEYHYx/6cnVapIkZnLt30zu2um+DzA9jQIDAQABoxUwEzARBglghkgBhvhC +AQEEBAMCBkAwDQYJKoZIhvcNAQEFBQADgYEAF4Q5BVqmCOLv1n8je/Jw9K669VXb +08hyGzQhkemEBYQd6fzQ9A/1ZzHkJKb1P6yreOLSEh4KcxYPyrLRC1ll8nr5OlCx +CMhKkTnR6qBsdNV0XtdU2+N25hqW+Ma4ZeqsN/iiJVCGNOZGnvQuvCAGWF8+J/f/ +iHkC6gGdBJhogs4= +-----END CERTIFICATE----- +-----BEGIN RSA PRIVATE KEY----- +Bad Key, though the cert should be OK +-----END RSA PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICpzCCAhCgAwIBAgIJAP+qStv1cIGNMA0GCSqGSIb3DQEBBQUAMIGJMQswCQYD +VQQGEwJVUzERMA8GA1UECBMIRGVsYXdhcmUxEzARBgNVBAcTCldpbG1pbmd0b24x +IzAhBgNVBAoTGlB5dGhvbiBTb2Z0d2FyZSBGb3VuZGF0aW9uMQwwCgYDVQQLEwNT +U0wxHzAdBgNVBAMTFnNvbWVtYWNoaW5lLnB5dGhvbi5vcmcwHhcNMDcwODI3MTY1 +NDUwWhcNMTMwMjE2MTY1NDUwWjCBiTELMAkGA1UEBhMCVVMxETAPBgNVBAgTCERl +bGF3YXJlMRMwEQYDVQQHEwpXaWxtaW5ndG9uMSMwIQYDVQQKExpQeXRob24gU29m +dHdhcmUgRm91bmRhdGlvbjEMMAoGA1UECxMDU1NMMR8wHQYDVQQDExZzb21lbWFj +aGluZS5weXRob24ub3JnMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC8ddrh +m+LutBvjYcQlnH21PPIseJ1JVG2HMmN2CmZk2YukO+9LopdJhTvbGfEj0DQs1IE8 +M+kTUyOmuKfVrFMKwtVeCJphrAnhoz7TYOuLBSqt7lVHfhi/VwovESJlaBOp+WMn +fhcduPEYHYx/6cnVapIkZnLt30zu2um+DzA9jQIDAQABoxUwEzARBglghkgBhvhC +AQEEBAMCBkAwDQYJKoZIhvcNAQEFBQADgYEAF4Q5BVqmCOLv1n8je/Jw9K669VXb +08hyGzQhkemEBYQd6fzQ9A/1ZzHkJKb1P6yreOLSEh4KcxYPyrLRC1ll8nr5OlCx +CMhKkTnR6qBsdNV0XtdU2+N25hqW+Ma4ZeqsN/iiJVCGNOZGnvQuvCAGWF8+J/f/ +iHkC6gGdBJhogs4= +-----END CERTIFICATE----- diff --git a/src/future/backports/test/dh512.pem b/src/future/backports/test/dh512.pem new file mode 100644 index 00000000..200d16cd --- /dev/null +++ b/src/future/backports/test/dh512.pem @@ -0,0 +1,9 @@ +-----BEGIN DH PARAMETERS----- +MEYCQQD1Kv884bEpQBgRjXyEpwpy1obEAxnIByl6ypUM2Zafq9AKUJsCRtMIPWak +XUGfnHy9iUsiGSa6q6Jew1XpKgVfAgEC +-----END DH PARAMETERS----- + +These are the 512 bit DH parameters from "Assigned Number for SKIP Protocols" +(http://www.skip-vpn.org/spec/numbers.html). +See there for how they were generated. +Note that g is not a generator, but this is not a problem since p is a safe prime. diff --git a/src/future/backports/test/https_svn_python_org_root.pem b/src/future/backports/test/https_svn_python_org_root.pem new file mode 100644 index 00000000..e7dfc829 --- /dev/null +++ b/src/future/backports/test/https_svn_python_org_root.pem @@ -0,0 +1,41 @@ +-----BEGIN CERTIFICATE----- +MIIHPTCCBSWgAwIBAgIBADANBgkqhkiG9w0BAQQFADB5MRAwDgYDVQQKEwdSb290 +IENBMR4wHAYDVQQLExVodHRwOi8vd3d3LmNhY2VydC5vcmcxIjAgBgNVBAMTGUNB +IENlcnQgU2lnbmluZyBBdXRob3JpdHkxITAfBgkqhkiG9w0BCQEWEnN1cHBvcnRA +Y2FjZXJ0Lm9yZzAeFw0wMzAzMzAxMjI5NDlaFw0zMzAzMjkxMjI5NDlaMHkxEDAO +BgNVBAoTB1Jvb3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEi +MCAGA1UEAxMZQ0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJ +ARYSc3VwcG9ydEBjYWNlcnQub3JnMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC +CgKCAgEAziLA4kZ97DYoB1CW8qAzQIxL8TtmPzHlawI229Z89vGIj053NgVBlfkJ +8BLPRoZzYLdufujAWGSuzbCtRRcMY/pnCujW0r8+55jE8Ez64AO7NV1sId6eINm6 +zWYyN3L69wj1x81YyY7nDl7qPv4coRQKFWyGhFtkZip6qUtTefWIonvuLwphK42y +fk1WpRPs6tqSnqxEQR5YYGUFZvjARL3LlPdCfgv3ZWiYUQXw8wWRBB0bF4LsyFe7 +w2t6iPGwcswlWyCR7BYCEo8y6RcYSNDHBS4CMEK4JZwFaz+qOqfrU0j36NK2B5jc +G8Y0f3/JHIJ6BVgrCFvzOKKrF11myZjXnhCLotLddJr3cQxyYN/Nb5gznZY0dj4k +epKwDpUeb+agRThHqtdB7Uq3EvbXG4OKDy7YCbZZ16oE/9KTfWgu3YtLq1i6L43q +laegw1SJpfvbi1EinbLDvhG+LJGGi5Z4rSDTii8aP8bQUWWHIbEZAWV/RRyH9XzQ +QUxPKZgh/TMfdQwEUfoZd9vUFBzugcMd9Zi3aQaRIt0AUMyBMawSB3s42mhb5ivU +fslfrejrckzzAeVLIL+aplfKkQABi6F1ITe1Yw1nPkZPcCBnzsXWWdsC4PDSy826 +YreQQejdIOQpvGQpQsgi3Hia/0PsmBsJUUtaWsJx8cTLc6nloQsCAwEAAaOCAc4w +ggHKMB0GA1UdDgQWBBQWtTIb1Mfz4OaO873SsDrusjkY0TCBowYDVR0jBIGbMIGY +gBQWtTIb1Mfz4OaO873SsDrusjkY0aF9pHsweTEQMA4GA1UEChMHUm9vdCBDQTEe +MBwGA1UECxMVaHR0cDovL3d3dy5jYWNlcnQub3JnMSIwIAYDVQQDExlDQSBDZXJ0 +IFNpZ25pbmcgQXV0aG9yaXR5MSEwHwYJKoZIhvcNAQkBFhJzdXBwb3J0QGNhY2Vy +dC5vcmeCAQAwDwYDVR0TAQH/BAUwAwEB/zAyBgNVHR8EKzApMCegJaAjhiFodHRw +czovL3d3dy5jYWNlcnQub3JnL3Jldm9rZS5jcmwwMAYJYIZIAYb4QgEEBCMWIWh0 +dHBzOi8vd3d3LmNhY2VydC5vcmcvcmV2b2tlLmNybDA0BglghkgBhvhCAQgEJxYl +aHR0cDovL3d3dy5jYWNlcnQub3JnL2luZGV4LnBocD9pZD0xMDBWBglghkgBhvhC +AQ0ESRZHVG8gZ2V0IHlvdXIgb3duIGNlcnRpZmljYXRlIGZvciBGUkVFIGhlYWQg +b3ZlciB0byBodHRwOi8vd3d3LmNhY2VydC5vcmcwDQYJKoZIhvcNAQEEBQADggIB +ACjH7pyCArpcgBLKNQodgW+JapnM8mgPf6fhjViVPr3yBsOQWqy1YPaZQwGjiHCc +nWKdpIevZ1gNMDY75q1I08t0AoZxPuIrA2jxNGJARjtT6ij0rPtmlVOKTV39O9lg +18p5aTuxZZKmxoGCXJzN600BiqXfEVWqFcofN8CCmHBh22p8lqOOLlQ+TyGpkO/c +gr/c6EWtTZBzCDyUZbAEmXZ/4rzCahWqlwQ3JNgelE5tDlG+1sSPypZt90Pf6DBl +Jzt7u0NDY8RD97LsaMzhGY4i+5jhe1o+ATc7iwiwovOVThrLm82asduycPAtStvY +sONvRUgzEv/+PDIqVPfE94rwiCPCR/5kenHA0R6mY7AHfqQv0wGP3J8rtsYIqQ+T +SCX8Ev2fQtzzxD72V7DX3WnRBnc0CkvSyqD/HMaMyRa+xMwyN2hzXwj7UfdJUzYF +CpUCTPJ5GhD22Dp1nPMd8aINcGeGG7MW9S/lpOt5hvk9C8JzC6WZrG/8Z7jlLwum +GCSNe9FINSkYQKyTYOGWhlC0elnYjyELn8+CkcY7v2vcB5G5l1YjqrZslMZIBjzk +zk6q5PYvCdxTby78dOs6Y5nCpqyJvKeyRKANihDjbPIky/qbn3BHLt4Ui9SyIAmW +omTxJBzcoTWcFbLUvFUufQb1nA5V9FrWk9p2rSVzTMVD +-----END CERTIFICATE----- diff --git a/src/future/backports/test/keycert.passwd.pem b/src/future/backports/test/keycert.passwd.pem new file mode 100644 index 00000000..e9057488 --- /dev/null +++ b/src/future/backports/test/keycert.passwd.pem @@ -0,0 +1,33 @@ +-----BEGIN RSA PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: DES-EDE3-CBC,1A8D9D2A02EC698A + +kJYbfZ8L0sfe9Oty3gw0aloNnY5E8fegRfQLZlNoxTl6jNt0nIwI8kDJ36CZgR9c +u3FDJm/KqrfUoz8vW+qEnWhSG7QPX2wWGPHd4K94Yz/FgrRzZ0DoK7XxXq9gOtVA +AVGQhnz32p+6WhfGsCr9ArXEwRZrTk/FvzEPaU5fHcoSkrNVAGX8IpSVkSDwEDQr +Gv17+cfk99UV1OCza6yKHoFkTtrC+PZU71LomBabivS2Oc4B9hYuSR2hF01wTHP+ +YlWNagZOOVtNz4oKK9x9eNQpmfQXQvPPTfusexKIbKfZrMvJoxcm1gfcZ0H/wK6P +6wmXSG35qMOOztCZNtperjs1wzEBXznyK8QmLcAJBjkfarABJX9vBEzZV0OUKhy+ +noORFwHTllphbmydLhu6ehLUZMHPhzAS5UN7srtpSN81eerDMy0RMUAwA7/PofX1 +94Me85Q8jP0PC9ETdsJcPqLzAPETEYu0ELewKRcrdyWi+tlLFrpE5KT/s5ecbl9l +7B61U4Kfd1PIXc/siINhU3A3bYK+845YyUArUOnKf1kEox7p1RpD7yFqVT04lRTo +cibNKATBusXSuBrp2G6GNuhWEOSafWCKJQAzgCYIp6ZTV2khhMUGppc/2H3CF6cO +zX0KtlPVZC7hLkB6HT8SxYUwF1zqWY7+/XPPdc37MeEZ87Q3UuZwqORLY+Z0hpgt +L5JXBCoklZhCAaN2GqwFLXtGiRSRFGY7xXIhbDTlE65Wv1WGGgDLMKGE1gOz3yAo +2jjG1+yAHJUdE69XTFHSqSkvaloA1W03LdMXZ9VuQJ/ySXCie6ABAQ== +-----END RSA PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICVDCCAb2gAwIBAgIJANfHOBkZr8JOMA0GCSqGSIb3DQEBBQUAMF8xCzAJBgNV +BAYTAlhZMRcwFQYDVQQHEw5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9u +IFNvZnR3YXJlIEZvdW5kYXRpb24xEjAQBgNVBAMTCWxvY2FsaG9zdDAeFw0xMDEw +MDgyMzAxNTZaFw0yMDEwMDUyMzAxNTZaMF8xCzAJBgNVBAYTAlhZMRcwFQYDVQQH +Ew5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9uIFNvZnR3YXJlIEZvdW5k +YXRpb24xEjAQBgNVBAMTCWxvY2FsaG9zdDCBnzANBgkqhkiG9w0BAQEFAAOBjQAw +gYkCgYEA21vT5isq7F68amYuuNpSFlKDPrMUCa4YWYqZRt2OZ+/3NKaZ2xAiSwr7 +6MrQF70t5nLbSPpqE5+5VrS58SY+g/sXLiFd6AplH1wJZwh78DofbFYXUggktFMt +pTyiX8jtP66bkcPkDADA089RI1TQR6Ca+n7HFa7c1fabVV6i3zkCAwEAAaMYMBYw +FAYDVR0RBA0wC4IJbG9jYWxob3N0MA0GCSqGSIb3DQEBBQUAA4GBAHPctQBEQ4wd +BJ6+JcpIraopLn8BGhbjNWj40mmRqWB/NAWF6M5ne7KpGAu7tLeG4hb1zLaldK8G +lxy2GPSRF6LFS48dpEj2HbMv2nvv6xxalDMJ9+DicWgAKTQ6bcX2j3GUkCR0g/T1 +CRlNBAAlvhKzO7Clpf9l0YKBEfraJByX +-----END CERTIFICATE----- diff --git a/src/future/backports/test/keycert.pem b/src/future/backports/test/keycert.pem new file mode 100644 index 00000000..64318aa2 --- /dev/null +++ b/src/future/backports/test/keycert.pem @@ -0,0 +1,31 @@ +-----BEGIN PRIVATE KEY----- +MIICdwIBADANBgkqhkiG9w0BAQEFAASCAmEwggJdAgEAAoGBANtb0+YrKuxevGpm +LrjaUhZSgz6zFAmuGFmKmUbdjmfv9zSmmdsQIksK++jK0Be9LeZy20j6ahOfuVa0 +ufEmPoP7Fy4hXegKZR9cCWcIe/A6H2xWF1IIJLRTLaU8ol/I7T+um5HD5AwAwNPP +USNU0Eegmvp+xxWu3NX2m1Veot85AgMBAAECgYA3ZdZ673X0oexFlq7AAmrutkHt +CL7LvwrpOiaBjhyTxTeSNWzvtQBkIU8DOI0bIazA4UreAFffwtvEuPmonDb3F+Iq +SMAu42XcGyVZEl+gHlTPU9XRX7nTOXVt+MlRRRxL6t9GkGfUAXI3XxJDXW3c0vBK +UL9xqD8cORXOfE06rQJBAP8mEX1ERkR64Ptsoe4281vjTlNfIbs7NMPkUnrn9N/Y +BLhjNIfQ3HFZG8BTMLfX7kCS9D593DW5tV4Z9BP/c6cCQQDcFzCcVArNh2JSywOQ +ZfTfRbJg/Z5Lt9Fkngv1meeGNPgIMLN8Sg679pAOOWmzdMO3V706rNPzSVMME7E5 +oPIfAkEA8pDddarP5tCvTTgUpmTFbakm0KoTZm2+FzHcnA4jRh+XNTjTOv98Y6Ik +eO5d1ZnKXseWvkZncQgxfdnMqqpj5wJAcNq/RVne1DbYlwWchT2Si65MYmmJ8t+F +0mcsULqjOnEMwf5e+ptq5LzwbyrHZYq5FNk7ocufPv/ZQrcSSC+cFwJBAKvOJByS +x56qyGeZLOQlWS2JS3KJo59XuLFGqcbgN9Om9xFa41Yb4N9NvplFivsvZdw3m1Q/ +SPIXQuT8RMPDVNQ= +-----END PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICVDCCAb2gAwIBAgIJANfHOBkZr8JOMA0GCSqGSIb3DQEBBQUAMF8xCzAJBgNV +BAYTAlhZMRcwFQYDVQQHEw5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9u +IFNvZnR3YXJlIEZvdW5kYXRpb24xEjAQBgNVBAMTCWxvY2FsaG9zdDAeFw0xMDEw +MDgyMzAxNTZaFw0yMDEwMDUyMzAxNTZaMF8xCzAJBgNVBAYTAlhZMRcwFQYDVQQH +Ew5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9uIFNvZnR3YXJlIEZvdW5k +YXRpb24xEjAQBgNVBAMTCWxvY2FsaG9zdDCBnzANBgkqhkiG9w0BAQEFAAOBjQAw +gYkCgYEA21vT5isq7F68amYuuNpSFlKDPrMUCa4YWYqZRt2OZ+/3NKaZ2xAiSwr7 +6MrQF70t5nLbSPpqE5+5VrS58SY+g/sXLiFd6AplH1wJZwh78DofbFYXUggktFMt +pTyiX8jtP66bkcPkDADA089RI1TQR6Ca+n7HFa7c1fabVV6i3zkCAwEAAaMYMBYw +FAYDVR0RBA0wC4IJbG9jYWxob3N0MA0GCSqGSIb3DQEBBQUAA4GBAHPctQBEQ4wd +BJ6+JcpIraopLn8BGhbjNWj40mmRqWB/NAWF6M5ne7KpGAu7tLeG4hb1zLaldK8G +lxy2GPSRF6LFS48dpEj2HbMv2nvv6xxalDMJ9+DicWgAKTQ6bcX2j3GUkCR0g/T1 +CRlNBAAlvhKzO7Clpf9l0YKBEfraJByX +-----END CERTIFICATE----- diff --git a/src/future/backports/test/keycert2.pem b/src/future/backports/test/keycert2.pem new file mode 100644 index 00000000..e8a9e082 --- /dev/null +++ b/src/future/backports/test/keycert2.pem @@ -0,0 +1,31 @@ +-----BEGIN PRIVATE KEY----- +MIICdwIBADANBgkqhkiG9w0BAQEFAASCAmEwggJdAgEAAoGBAJnsJZVrppL+W5I9 +zGQrrawWwE5QJpBK9nWw17mXrZ03R1cD9BamLGivVISbPlRlAVnZBEyh1ATpsB7d +CUQ+WHEvALquvx4+Yw5l+fXeiYRjrLRBYZuVy8yNtXzU3iWcGObcYRkUdiXdOyP7 +sLF2YZHRvQZpzgDBKkrraeQ81w21AgMBAAECgYBEm7n07FMHWlE+0kT0sXNsLYfy +YE+QKZnJw9WkaDN+zFEEPELkhZVt5BjsMraJr6v2fIEqF0gGGJPkbenffVq2B5dC +lWUOxvJHufMK4sM3Cp6s/gOp3LP+QkzVnvJSfAyZU6l+4PGX5pLdUsXYjPxgzjzL +S36tF7/2Uv1WePyLUQJBAMsPhYzUXOPRgmbhcJiqi9A9c3GO8kvSDYTCKt3VMnqz +HBn6MQ4VQasCD1F+7jWTI0FU/3vdw8non/Fj8hhYqZcCQQDCDRdvmZqDiZnpMqDq +L6ZSrLTVtMvZXZbgwForaAD9uHj51TME7+eYT7EG2YCgJTXJ4YvRJEnPNyskwdKt +vTSTAkEAtaaN/vyemEJ82BIGStwONNw0ILsSr5cZ9tBHzqiA/tipY+e36HRFiXhP +QcU9zXlxyWkDH8iz9DSAmE2jbfoqwwJANlMJ65E543cjIlitGcKLMnvtCCLcKpb7 +xSG0XJB6Lo11OKPJ66jp0gcFTSCY1Lx2CXVd+gfJrfwI1Pp562+bhwJBAJ9IfDPU +R8OpO9v1SGd8x33Owm7uXOpB9d63/T70AD1QOXjKUC4eXYbt0WWfWuny/RNPRuyh +w7DXSfUF+kPKolU= +-----END PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICXTCCAcagAwIBAgIJAIO3upAG445fMA0GCSqGSIb3DQEBBQUAMGIxCzAJBgNV +BAYTAlhZMRcwFQYDVQQHEw5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9u +IFNvZnR3YXJlIEZvdW5kYXRpb24xFTATBgNVBAMTDGZha2Vob3N0bmFtZTAeFw0x +MDEwMDkxNTAxMDBaFw0yMDEwMDYxNTAxMDBaMGIxCzAJBgNVBAYTAlhZMRcwFQYD +VQQHEw5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9uIFNvZnR3YXJlIEZv +dW5kYXRpb24xFTATBgNVBAMTDGZha2Vob3N0bmFtZTCBnzANBgkqhkiG9w0BAQEF +AAOBjQAwgYkCgYEAmewllWumkv5bkj3MZCutrBbATlAmkEr2dbDXuZetnTdHVwP0 +FqYsaK9UhJs+VGUBWdkETKHUBOmwHt0JRD5YcS8Auq6/Hj5jDmX59d6JhGOstEFh +m5XLzI21fNTeJZwY5txhGRR2Jd07I/uwsXZhkdG9BmnOAMEqSutp5DzXDbUCAwEA +AaMbMBkwFwYDVR0RBBAwDoIMZmFrZWhvc3RuYW1lMA0GCSqGSIb3DQEBBQUAA4GB +AH+iMClLLGSaKWgwXsmdVo4FhTZZHo8Uprrtg3N9FxEeE50btpDVQysgRt5ias3K +m+bME9zbKwvbVWD5zZdjus4pDgzwF/iHyccL8JyYhxOvS/9zmvAtFXj/APIIbZFp +IT75d9f88ScIGEtknZQejnrdhB64tYki/EqluiuKBqKD +-----END CERTIFICATE----- diff --git a/src/future/backports/test/nokia.pem b/src/future/backports/test/nokia.pem new file mode 100644 index 00000000..0d044df4 --- /dev/null +++ b/src/future/backports/test/nokia.pem @@ -0,0 +1,31 @@ +# Certificate for projects.developer.nokia.com:443 (see issue 13034) +-----BEGIN CERTIFICATE----- +MIIFLDCCBBSgAwIBAgIQLubqdkCgdc7lAF9NfHlUmjANBgkqhkiG9w0BAQUFADCB +vDELMAkGA1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQL +ExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMTswOQYDVQQLEzJUZXJtcyBvZiB1c2Ug +YXQgaHR0cHM6Ly93d3cudmVyaXNpZ24uY29tL3JwYSAoYykxMDE2MDQGA1UEAxMt +VmVyaVNpZ24gQ2xhc3MgMyBJbnRlcm5hdGlvbmFsIFNlcnZlciBDQSAtIEczMB4X +DTExMDkyMTAwMDAwMFoXDTEyMDkyMDIzNTk1OVowcTELMAkGA1UEBhMCRkkxDjAM +BgNVBAgTBUVzcG9vMQ4wDAYDVQQHFAVFc3BvbzEOMAwGA1UEChQFTm9raWExCzAJ +BgNVBAsUAkJJMSUwIwYDVQQDFBxwcm9qZWN0cy5kZXZlbG9wZXIubm9raWEuY29t +MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCr92w1bpHYSYxUEx8N/8Iddda2 +lYi+aXNtQfV/l2Fw9Ykv3Ipw4nLeGTj18FFlAZgMdPRlgrzF/NNXGw/9l3/qKdow +CypkQf8lLaxb9Ze1E/KKmkRJa48QTOqvo6GqKuTI6HCeGlG1RxDb8YSKcQWLiytn +yj3Wp4MgRQO266xmMQIDAQABo4IB9jCCAfIwQQYDVR0RBDowOIIccHJvamVjdHMu +ZGV2ZWxvcGVyLm5va2lhLmNvbYIYcHJvamVjdHMuZm9ydW0ubm9raWEuY29tMAkG +A1UdEwQCMAAwCwYDVR0PBAQDAgWgMEEGA1UdHwQ6MDgwNqA0oDKGMGh0dHA6Ly9T +VlJJbnRsLUczLWNybC52ZXJpc2lnbi5jb20vU1ZSSW50bEczLmNybDBEBgNVHSAE +PTA7MDkGC2CGSAGG+EUBBxcDMCowKAYIKwYBBQUHAgEWHGh0dHBzOi8vd3d3LnZl +cmlzaWduLmNvbS9ycGEwKAYDVR0lBCEwHwYJYIZIAYb4QgQBBggrBgEFBQcDAQYI +KwYBBQUHAwIwcgYIKwYBBQUHAQEEZjBkMCQGCCsGAQUFBzABhhhodHRwOi8vb2Nz +cC52ZXJpc2lnbi5jb20wPAYIKwYBBQUHMAKGMGh0dHA6Ly9TVlJJbnRsLUczLWFp +YS52ZXJpc2lnbi5jb20vU1ZSSW50bEczLmNlcjBuBggrBgEFBQcBDARiMGChXqBc +MFowWDBWFglpbWFnZS9naWYwITAfMAcGBSsOAwIaBBRLa7kolgYMu9BSOJsprEsH +iyEFGDAmFiRodHRwOi8vbG9nby52ZXJpc2lnbi5jb20vdnNsb2dvMS5naWYwDQYJ +KoZIhvcNAQEFBQADggEBACQuPyIJqXwUyFRWw9x5yDXgMW4zYFopQYOw/ItRY522 +O5BsySTh56BWS6mQB07XVfxmYUGAvRQDA5QHpmY8jIlNwSmN3s8RKo+fAtiNRlcL +x/mWSfuMs3D/S6ev3D6+dpEMZtjrhOdctsarMKp8n/hPbwhAbg5hVjpkW5n8vz2y +0KxvvkA1AxpLwpVv7OlK17ttzIHw8bp9HTlHBU5s8bKz4a565V/a5HI0CSEv/+0y +ko4/ghTnZc1CkmUngKKeFMSah/mT/xAh8XnE2l1AazFa8UKuYki1e+ArHaGZc4ix +UYOtiRphwfuYQhRZ7qX9q2MMkCMI65XNK/SaFrAbbG0= +-----END CERTIFICATE----- diff --git a/src/future/backports/test/nullbytecert.pem b/src/future/backports/test/nullbytecert.pem new file mode 100644 index 00000000..447186c9 --- /dev/null +++ b/src/future/backports/test/nullbytecert.pem @@ -0,0 +1,90 @@ +Certificate: + Data: + Version: 3 (0x2) + Serial Number: 0 (0x0) + Signature Algorithm: sha1WithRSAEncryption + Issuer: C=US, ST=Oregon, L=Beaverton, O=Python Software Foundation, OU=Python Core Development, CN=null.python.org\x00example.org/emailAddress=python-dev@python.org + Validity + Not Before: Aug 7 13:11:52 2013 GMT + Not After : Aug 7 13:12:52 2013 GMT + Subject: C=US, ST=Oregon, L=Beaverton, O=Python Software Foundation, OU=Python Core Development, CN=null.python.org\x00example.org/emailAddress=python-dev@python.org + Subject Public Key Info: + Public Key Algorithm: rsaEncryption + Public-Key: (2048 bit) + Modulus: + 00:b5:ea:ed:c9:fb:46:7d:6f:3b:76:80:dd:3a:f3: + 03:94:0b:a7:a6:db:ec:1d:df:ff:23:74:08:9d:97: + 16:3f:a3:a4:7b:3e:1b:0e:96:59:25:03:a7:26:e2: + 88:a9:cf:79:cd:f7:04:56:b0:ab:79:32:6e:59:c1: + 32:30:54:eb:58:a8:cb:91:f0:42:a5:64:27:cb:d4: + 56:31:88:52:ad:cf:bd:7f:f0:06:64:1f:cc:27:b8: + a3:8b:8c:f3:d8:29:1f:25:0b:f5:46:06:1b:ca:02: + 45:ad:7b:76:0a:9c:bf:bb:b9:ae:0d:16:ab:60:75: + ae:06:3e:9c:7c:31:dc:92:2f:29:1a:e0:4b:0c:91: + 90:6c:e9:37:c5:90:d7:2a:d7:97:15:a3:80:8f:5d: + 7b:49:8f:54:30:d4:97:2c:1c:5b:37:b5:ab:69:30: + 68:43:d3:33:78:4b:02:60:f5:3c:44:80:a1:8f:e7: + f0:0f:d1:5e:87:9e:46:cf:62:fc:f9:bf:0c:65:12: + f1:93:c8:35:79:3f:c8:ec:ec:47:f5:ef:be:44:d5: + ae:82:1e:2d:9a:9f:98:5a:67:65:e1:74:70:7c:cb: + d3:c2:ce:0e:45:49:27:dc:e3:2d:d4:fb:48:0e:2f: + 9e:77:b8:14:46:c0:c4:36:ca:02:ae:6a:91:8c:da: + 2f:85 + Exponent: 65537 (0x10001) + X509v3 extensions: + X509v3 Basic Constraints: critical + CA:FALSE + X509v3 Subject Key Identifier: + 88:5A:55:C0:52:FF:61:CD:52:A3:35:0F:EA:5A:9C:24:38:22:F7:5C + X509v3 Key Usage: + Digital Signature, Non Repudiation, Key Encipherment + X509v3 Subject Alternative Name: + ************************************************************* + WARNING: The values for DNS, email and URI are WRONG. OpenSSL + doesn't print the text after a NULL byte. + ************************************************************* + DNS:altnull.python.org, email:null@python.org, URI:http://null.python.org, IP Address:192.0.2.1, IP Address:2001:DB8:0:0:0:0:0:1 + Signature Algorithm: sha1WithRSAEncryption + ac:4f:45:ef:7d:49:a8:21:70:8e:88:59:3e:d4:36:42:70:f5: + a3:bd:8b:d7:a8:d0:58:f6:31:4a:b1:a4:a6:dd:6f:d9:e8:44: + 3c:b6:0a:71:d6:7f:b1:08:61:9d:60:ce:75:cf:77:0c:d2:37: + 86:02:8d:5e:5d:f9:0f:71:b4:16:a8:c1:3d:23:1c:f1:11:b3: + 56:6e:ca:d0:8d:34:94:e6:87:2a:99:f2:ae:ae:cc:c2:e8:86: + de:08:a8:7f:c5:05:fa:6f:81:a7:82:e6:d0:53:9d:34:f4:ac: + 3e:40:fe:89:57:7a:29:a4:91:7e:0b:c6:51:31:e5:10:2f:a4: + 60:76:cd:95:51:1a:be:8b:a1:b0:fd:ad:52:bd:d7:1b:87:60: + d2:31:c7:17:c4:18:4f:2d:08:25:a3:a7:4f:b7:92:ca:e2:f5: + 25:f1:54:75:81:9d:b3:3d:61:a2:f7:da:ed:e1:c6:6f:2c:60: + 1f:d8:6f:c5:92:05:ab:c9:09:62:49:a9:14:ad:55:11:cc:d6: + 4a:19:94:99:97:37:1d:81:5f:8b:cf:a3:a8:96:44:51:08:3d: + 0b:05:65:12:eb:b6:70:80:88:48:72:4f:c6:c2:da:cf:cd:8e: + 5b:ba:97:2f:60:b4:96:56:49:5e:3a:43:76:63:04:be:2a:f6: + c1:ca:a9:94 +-----BEGIN CERTIFICATE----- +MIIE2DCCA8CgAwIBAgIBADANBgkqhkiG9w0BAQUFADCBxTELMAkGA1UEBhMCVVMx +DzANBgNVBAgMBk9yZWdvbjESMBAGA1UEBwwJQmVhdmVydG9uMSMwIQYDVQQKDBpQ +eXRob24gU29mdHdhcmUgRm91bmRhdGlvbjEgMB4GA1UECwwXUHl0aG9uIENvcmUg +RGV2ZWxvcG1lbnQxJDAiBgNVBAMMG251bGwucHl0aG9uLm9yZwBleGFtcGxlLm9y +ZzEkMCIGCSqGSIb3DQEJARYVcHl0aG9uLWRldkBweXRob24ub3JnMB4XDTEzMDgw +NzEzMTE1MloXDTEzMDgwNzEzMTI1MlowgcUxCzAJBgNVBAYTAlVTMQ8wDQYDVQQI +DAZPcmVnb24xEjAQBgNVBAcMCUJlYXZlcnRvbjEjMCEGA1UECgwaUHl0aG9uIFNv +ZnR3YXJlIEZvdW5kYXRpb24xIDAeBgNVBAsMF1B5dGhvbiBDb3JlIERldmVsb3Bt +ZW50MSQwIgYDVQQDDBtudWxsLnB5dGhvbi5vcmcAZXhhbXBsZS5vcmcxJDAiBgkq +hkiG9w0BCQEWFXB5dGhvbi1kZXZAcHl0aG9uLm9yZzCCASIwDQYJKoZIhvcNAQEB +BQADggEPADCCAQoCggEBALXq7cn7Rn1vO3aA3TrzA5QLp6bb7B3f/yN0CJ2XFj+j +pHs+Gw6WWSUDpybiiKnPec33BFawq3kyblnBMjBU61ioy5HwQqVkJ8vUVjGIUq3P +vX/wBmQfzCe4o4uM89gpHyUL9UYGG8oCRa17dgqcv7u5rg0Wq2B1rgY+nHwx3JIv +KRrgSwyRkGzpN8WQ1yrXlxWjgI9de0mPVDDUlywcWze1q2kwaEPTM3hLAmD1PESA +oY/n8A/RXoeeRs9i/Pm/DGUS8ZPINXk/yOzsR/XvvkTVroIeLZqfmFpnZeF0cHzL +08LODkVJJ9zjLdT7SA4vnne4FEbAxDbKAq5qkYzaL4UCAwEAAaOB0DCBzTAMBgNV +HRMBAf8EAjAAMB0GA1UdDgQWBBSIWlXAUv9hzVKjNQ/qWpwkOCL3XDALBgNVHQ8E +BAMCBeAwgZAGA1UdEQSBiDCBhYIeYWx0bnVsbC5weXRob24ub3JnAGV4YW1wbGUu +Y29tgSBudWxsQHB5dGhvbi5vcmcAdXNlckBleGFtcGxlLm9yZ4YpaHR0cDovL251 +bGwucHl0aG9uLm9yZwBodHRwOi8vZXhhbXBsZS5vcmeHBMAAAgGHECABDbgAAAAA +AAAAAAAAAAEwDQYJKoZIhvcNAQEFBQADggEBAKxPRe99SaghcI6IWT7UNkJw9aO9 +i9eo0Fj2MUqxpKbdb9noRDy2CnHWf7EIYZ1gznXPdwzSN4YCjV5d+Q9xtBaowT0j +HPERs1ZuytCNNJTmhyqZ8q6uzMLoht4IqH/FBfpvgaeC5tBTnTT0rD5A/olXeimk +kX4LxlEx5RAvpGB2zZVRGr6LobD9rVK91xuHYNIxxxfEGE8tCCWjp0+3ksri9SXx +VHWBnbM9YaL32u3hxm8sYB/Yb8WSBavJCWJJqRStVRHM1koZlJmXNx2BX4vPo6iW +RFEIPQsFZRLrtnCAiEhyT8bC2s/Njlu6ly9gtJZWSV46Q3ZjBL4q9sHKqZQ= +-----END CERTIFICATE----- diff --git a/libfuturize/test_scripts/py3/emptymodule.py b/src/future/backports/test/nullcert.pem similarity index 100% rename from libfuturize/test_scripts/py3/emptymodule.py rename to src/future/backports/test/nullcert.pem diff --git a/future/standard_library/test/pystone.py b/src/future/backports/test/pystone.py similarity index 98% rename from future/standard_library/test/pystone.py rename to src/future/backports/test/pystone.py index 02e9a171..7652027b 100755 --- a/future/standard_library/test/pystone.py +++ b/src/future/backports/test/pystone.py @@ -1,4 +1,4 @@ -#! /usr/bin/python2.7 +#!/usr/bin/env python3 """ "PYSTONE" Benchmark Program @@ -40,7 +40,7 @@ __version__ = "1.1" -[Ident1, Ident2, Ident3, Ident4, Ident5] = list(range(1, 6)) +[Ident1, Ident2, Ident3, Ident4, Ident5] = range(1, 6) class Record(object): diff --git a/src/future/backports/test/sha256.pem b/src/future/backports/test/sha256.pem new file mode 100644 index 00000000..d3db4b85 --- /dev/null +++ b/src/future/backports/test/sha256.pem @@ -0,0 +1,128 @@ +# Certificate chain for https://sha256.tbs-internet.com + 0 s:/C=FR/postalCode=14000/ST=Calvados/L=CAEN/street=22 rue de Bretagne/O=TBS INTERNET/OU=0002 440443810/OU=sha-256 production/CN=sha256.tbs-internet.com + i:/C=FR/ST=Calvados/L=Caen/O=TBS INTERNET/OU=Terms and Conditions: http://www.tbs-internet.com/CA/repository/OU=TBS INTERNET CA/CN=TBS X509 CA SGC +-----BEGIN CERTIFICATE----- +MIIGXDCCBUSgAwIBAgIRAKpVmHgg9nfCodAVwcP4siwwDQYJKoZIhvcNAQELBQAw +gcQxCzAJBgNVBAYTAkZSMREwDwYDVQQIEwhDYWx2YWRvczENMAsGA1UEBxMEQ2Fl +bjEVMBMGA1UEChMMVEJTIElOVEVSTkVUMUgwRgYDVQQLEz9UZXJtcyBhbmQgQ29u +ZGl0aW9uczogaHR0cDovL3d3dy50YnMtaW50ZXJuZXQuY29tL0NBL3JlcG9zaXRv +cnkxGDAWBgNVBAsTD1RCUyBJTlRFUk5FVCBDQTEYMBYGA1UEAxMPVEJTIFg1MDkg +Q0EgU0dDMB4XDTEyMDEwNDAwMDAwMFoXDTE0MDIxNzIzNTk1OVowgcsxCzAJBgNV +BAYTAkZSMQ4wDAYDVQQREwUxNDAwMDERMA8GA1UECBMIQ2FsdmFkb3MxDTALBgNV +BAcTBENBRU4xGzAZBgNVBAkTEjIyIHJ1ZSBkZSBCcmV0YWduZTEVMBMGA1UEChMM +VEJTIElOVEVSTkVUMRcwFQYDVQQLEw4wMDAyIDQ0MDQ0MzgxMDEbMBkGA1UECxMS +c2hhLTI1NiBwcm9kdWN0aW9uMSAwHgYDVQQDExdzaGEyNTYudGJzLWludGVybmV0 +LmNvbTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAKQIX/zdJcyxty0m +PM1XQSoSSifueS3AVcgqMsaIKS/u+rYzsv4hQ/qA6vLn5m5/ewUcZDj7zdi6rBVf +PaVNXJ6YinLX0tkaW8TEjeVuZG5yksGZlhCt1CJ1Ho9XLiLaP4uJ7MCoNUntpJ+E +LfrOdgsIj91kPmwjDJeztVcQCvKzhjVJA/KxdInc0JvOATn7rpaSmQI5bvIjufgo +qVsTPwVFzuUYULXBk7KxRT7MiEqnd5HvviNh0285QC478zl3v0I0Fb5El4yD3p49 +IthcRnxzMKc0UhU5ogi0SbONyBfm/mzONVfSxpM+MlyvZmJqrbuuLoEDzJD+t8PU +xSuzgbcCAwEAAaOCAj4wggI6MB8GA1UdIwQYMBaAFAdEdoWTKLx/bXjSCuv6TEvf +2YIfMB0GA1UdDgQWBBT/qTGYdaj+f61c2IRFL/B1eEsM8DAOBgNVHQ8BAf8EBAMC +BaAwDAYDVR0TAQH/BAIwADA0BgNVHSUELTArBggrBgEFBQcDAQYIKwYBBQUHAwIG +CisGAQQBgjcKAwMGCWCGSAGG+EIEATBLBgNVHSAERDBCMEAGCisGAQQB5TcCBAEw +MjAwBggrBgEFBQcCARYkaHR0cHM6Ly93d3cudGJzLWludGVybmV0LmNvbS9DQS9D +UFM0MG0GA1UdHwRmMGQwMqAwoC6GLGh0dHA6Ly9jcmwudGJzLWludGVybmV0LmNv +bS9UQlNYNTA5Q0FTR0MuY3JsMC6gLKAqhihodHRwOi8vY3JsLnRicy14NTA5LmNv +bS9UQlNYNTA5Q0FTR0MuY3JsMIGmBggrBgEFBQcBAQSBmTCBljA4BggrBgEFBQcw +AoYsaHR0cDovL2NydC50YnMtaW50ZXJuZXQuY29tL1RCU1g1MDlDQVNHQy5jcnQw +NAYIKwYBBQUHMAKGKGh0dHA6Ly9jcnQudGJzLXg1MDkuY29tL1RCU1g1MDlDQVNH +Qy5jcnQwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLnRicy14NTA5LmNvbTA/BgNV +HREEODA2ghdzaGEyNTYudGJzLWludGVybmV0LmNvbYIbd3d3LnNoYTI1Ni50YnMt +aW50ZXJuZXQuY29tMA0GCSqGSIb3DQEBCwUAA4IBAQA0pOuL8QvAa5yksTbGShzX +ABApagunUGoEydv4YJT1MXy9tTp7DrWaozZSlsqBxrYAXP1d9r2fuKbEniYHxaQ0 +UYaf1VSIlDo1yuC8wE7wxbHDIpQ/E5KAyxiaJ8obtDhFstWAPAH+UoGXq0kj2teN +21sFQ5dXgA95nldvVFsFhrRUNB6xXAcaj0VZFhttI0ZfQZmQwEI/P+N9Jr40OGun +aa+Dn0TMeUH4U20YntfLbu2nDcJcYfyurm+8/0Tr4HznLnedXu9pCPYj0TaddrgT +XO0oFiyy7qGaY6+qKh71yD64Y3ycCJ/HR9Wm39mjZYc9ezYwT4noP6r7Lk8YO7/q +-----END CERTIFICATE----- + 1 s:/C=FR/ST=Calvados/L=Caen/O=TBS INTERNET/OU=Terms and Conditions: http://www.tbs-internet.com/CA/repository/OU=TBS INTERNET CA/CN=TBS X509 CA SGC + i:/C=SE/O=AddTrust AB/OU=AddTrust External TTP Network/CN=AddTrust External CA Root +-----BEGIN CERTIFICATE----- +MIIFVjCCBD6gAwIBAgIQXpDZ0ETJMV02WTx3GTnhhTANBgkqhkiG9w0BAQUFADBv +MQswCQYDVQQGEwJTRTEUMBIGA1UEChMLQWRkVHJ1c3QgQUIxJjAkBgNVBAsTHUFk +ZFRydXN0IEV4dGVybmFsIFRUUCBOZXR3b3JrMSIwIAYDVQQDExlBZGRUcnVzdCBF +eHRlcm5hbCBDQSBSb290MB4XDTA1MTIwMTAwMDAwMFoXDTE5MDYyNDE5MDYzMFow +gcQxCzAJBgNVBAYTAkZSMREwDwYDVQQIEwhDYWx2YWRvczENMAsGA1UEBxMEQ2Fl +bjEVMBMGA1UEChMMVEJTIElOVEVSTkVUMUgwRgYDVQQLEz9UZXJtcyBhbmQgQ29u +ZGl0aW9uczogaHR0cDovL3d3dy50YnMtaW50ZXJuZXQuY29tL0NBL3JlcG9zaXRv +cnkxGDAWBgNVBAsTD1RCUyBJTlRFUk5FVCBDQTEYMBYGA1UEAxMPVEJTIFg1MDkg +Q0EgU0dDMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAsgOkO3f7wzN6 +rOjg45tR5vjBfzK7qmV9IBxb/QW9EEXxG+E7FNhZqQLtwGBKoSsHTnQqV75wWMk0 +9tinWvftBkSpj5sTi/8cbzJfUvTSVYh3Qxv6AVVjMMH/ruLjE6y+4PoaPs8WoYAQ +ts5R4Z1g8c/WnTepLst2x0/Wv7GmuoQi+gXvHU6YrBiu7XkeYhzc95QdviWSJRDk +owhb5K43qhcvjRmBfO/paGlCliDGZp8mHwrI21mwobWpVjTxZRwYO3bd4+TGcI4G +Ie5wmHwE8F7SK1tgSqbBacKjDa93j7txKkfz/Yd2n7TGqOXiHPsJpG655vrKtnXk +9vs1zoDeJQIDAQABo4IBljCCAZIwHQYDVR0OBBYEFAdEdoWTKLx/bXjSCuv6TEvf +2YIfMA4GA1UdDwEB/wQEAwIBBjASBgNVHRMBAf8ECDAGAQH/AgEAMCAGA1UdJQQZ +MBcGCisGAQQBgjcKAwMGCWCGSAGG+EIEATAYBgNVHSAEETAPMA0GCysGAQQBgOU3 +AgQBMHsGA1UdHwR0MHIwOKA2oDSGMmh0dHA6Ly9jcmwuY29tb2RvY2EuY29tL0Fk +ZFRydXN0RXh0ZXJuYWxDQVJvb3QuY3JsMDagNKAyhjBodHRwOi8vY3JsLmNvbW9k +by5uZXQvQWRkVHJ1c3RFeHRlcm5hbENBUm9vdC5jcmwwgYAGCCsGAQUFBwEBBHQw +cjA4BggrBgEFBQcwAoYsaHR0cDovL2NydC5jb21vZG9jYS5jb20vQWRkVHJ1c3RV +VE5TR0NDQS5jcnQwNgYIKwYBBQUHMAKGKmh0dHA6Ly9jcnQuY29tb2RvLm5ldC9B +ZGRUcnVzdFVUTlNHQ0NBLmNydDARBglghkgBhvhCAQEEBAMCAgQwDQYJKoZIhvcN +AQEFBQADggEBAK2zEzs+jcIrVK9oDkdDZNvhuBYTdCfpxfFs+OAujW0bIfJAy232 +euVsnJm6u/+OrqKudD2tad2BbejLLXhMZViaCmK7D9nrXHx4te5EP8rL19SUVqLY +1pTnv5dhNgEgvA7n5lIzDSYs7yRLsr7HJsYPr6SeYSuZizyX1SNz7ooJ32/F3X98 +RB0Mlc/E0OyOrkQ9/y5IrnpnaSora8CnUrV5XNOg+kyCz9edCyx4D5wXYcwZPVWz +8aDqquESrezPyjtfi4WRO4s/VD3HLZvOxzMrWAVYCDG9FxaOhF0QGuuG1F7F3GKV +v6prNyCl016kRl2j1UT+a7gLd8fA25A4C9E= +-----END CERTIFICATE----- + 2 s:/C=SE/O=AddTrust AB/OU=AddTrust External TTP Network/CN=AddTrust External CA Root + i:/C=US/ST=UT/L=Salt Lake City/O=The USERTRUST Network/OU=http://www.usertrust.com/CN=UTN - DATACorp SGC +-----BEGIN CERTIFICATE----- +MIIEZjCCA06gAwIBAgIQUSYKkxzif5zDpV954HKugjANBgkqhkiG9w0BAQUFADCB +kzELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAlVUMRcwFQYDVQQHEw5TYWx0IExha2Ug +Q2l0eTEeMBwGA1UEChMVVGhlIFVTRVJUUlVTVCBOZXR3b3JrMSEwHwYDVQQLExho +dHRwOi8vd3d3LnVzZXJ0cnVzdC5jb20xGzAZBgNVBAMTElVUTiAtIERBVEFDb3Jw +IFNHQzAeFw0wNTA2MDcwODA5MTBaFw0xOTA2MjQxOTA2MzBaMG8xCzAJBgNVBAYT +AlNFMRQwEgYDVQQKEwtBZGRUcnVzdCBBQjEmMCQGA1UECxMdQWRkVHJ1c3QgRXh0 +ZXJuYWwgVFRQIE5ldHdvcmsxIjAgBgNVBAMTGUFkZFRydXN0IEV4dGVybmFsIENB +IFJvb3QwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC39xoz5vIABC05 +4E5b7R+8bA/Ntfojts7emxEzl6QpTH2Tn71KvJPtAxrjj8/lbVBa1pcplFqAsEl6 +2y6V/bjKvzc4LR4+kUGtcFbH8E8/6DKedMrIkFTpxl8PeJ2aQDwOrGGqXhSPnoeh +alDc15pOrwWzpnGUnHGzUGAKxxOdOAeGAqjpqGkmGJCrTLBPI6s6T4TY386f4Wlv +u9dC12tE5Met7m1BX3JacQg3s3llpFmglDf3AC8NwpJy2tA4ctsUqEXEXSp9t7TW +xO6szRNEt8kr3UMAJfphuWlqWCMRt6czj1Z1WfXNKddGtworZbbTQm8Vsrh7++/p +XVPVNFonAgMBAAGjgdgwgdUwHwYDVR0jBBgwFoAUUzLRs89/+uDxoF2FTpLSnkUd +tE8wHQYDVR0OBBYEFK29mHo0tCb3+sQmVO8DveAky1QaMA4GA1UdDwEB/wQEAwIB +BjAPBgNVHRMBAf8EBTADAQH/MBEGCWCGSAGG+EIBAQQEAwIBAjAgBgNVHSUEGTAX +BgorBgEEAYI3CgMDBglghkgBhvhCBAEwPQYDVR0fBDYwNDAyoDCgLoYsaHR0cDov +L2NybC51c2VydHJ1c3QuY29tL1VUTi1EQVRBQ29ycFNHQy5jcmwwDQYJKoZIhvcN +AQEFBQADggEBAMbuUxdoFLJRIh6QWA2U/b3xcOWGLcM2MY9USEbnLQg3vGwKYOEO +rVE04BKT6b64q7gmtOmWPSiPrmQH/uAB7MXjkesYoPF1ftsK5p+R26+udd8jkWjd +FwBaS/9kbHDrARrQkNnHptZt9hPk/7XJ0h4qy7ElQyZ42TCbTg0evmnv3+r+LbPM ++bDdtRTKkdSytaX7ARmjR3mfnYyVhzT4HziS2jamEfpr62vp3EV4FTkG101B5CHI +3C+H0be/SGB1pWLLJN47YaApIKa+xWycxOkKaSLvkTr6Jq/RW0GnOuL4OAdCq8Fb ++M5tug8EPzI0rNwEKNdwMBQmBsTkm5jVz3g= +-----END CERTIFICATE----- + 3 s:/C=US/ST=UT/L=Salt Lake City/O=The USERTRUST Network/OU=http://www.usertrust.com/CN=UTN - DATACorp SGC + i:/C=US/ST=UT/L=Salt Lake City/O=The USERTRUST Network/OU=http://www.usertrust.com/CN=UTN - DATACorp SGC +-----BEGIN CERTIFICATE----- +MIIEXjCCA0agAwIBAgIQRL4Mi1AAIbQR0ypoBqmtaTANBgkqhkiG9w0BAQUFADCB +kzELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAlVUMRcwFQYDVQQHEw5TYWx0IExha2Ug +Q2l0eTEeMBwGA1UEChMVVGhlIFVTRVJUUlVTVCBOZXR3b3JrMSEwHwYDVQQLExho +dHRwOi8vd3d3LnVzZXJ0cnVzdC5jb20xGzAZBgNVBAMTElVUTiAtIERBVEFDb3Jw +IFNHQzAeFw05OTA2MjQxODU3MjFaFw0xOTA2MjQxOTA2MzBaMIGTMQswCQYDVQQG +EwJVUzELMAkGA1UECBMCVVQxFzAVBgNVBAcTDlNhbHQgTGFrZSBDaXR5MR4wHAYD +VQQKExVUaGUgVVNFUlRSVVNUIE5ldHdvcmsxITAfBgNVBAsTGGh0dHA6Ly93d3cu +dXNlcnRydXN0LmNvbTEbMBkGA1UEAxMSVVROIC0gREFUQUNvcnAgU0dDMIIBIjAN +BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA3+5YEKIrblXEjr8uRgnn4AgPLit6 +E5Qbvfa2gI5lBZMAHryv4g+OGQ0SR+ysraP6LnD43m77VkIVni5c7yPeIbkFdicZ +D0/Ww5y0vpQZY/KmEQrrU0icvvIpOxboGqBMpsn0GFlowHDyUwDAXlCCpVZvNvlK +4ESGoE1O1kduSUrLZ9emxAW5jh70/P/N5zbgnAVssjMiFdC04MwXwLLA9P4yPykq +lXvY8qdOD1R8oQ2AswkDwf9c3V6aPryuvEeKaq5xyh+xKrhfQgUL7EYw0XILyulW +bfXv33i+Ybqypa4ETLyorGkVl73v67SMvzX41MPRKA5cOp9wGDMgd8SirwIDAQAB +o4GrMIGoMAsGA1UdDwQEAwIBxjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBRT +MtGzz3/64PGgXYVOktKeRR20TzA9BgNVHR8ENjA0MDKgMKAuhixodHRwOi8vY3Js +LnVzZXJ0cnVzdC5jb20vVVROLURBVEFDb3JwU0dDLmNybDAqBgNVHSUEIzAhBggr +BgEFBQcDAQYKKwYBBAGCNwoDAwYJYIZIAYb4QgQBMA0GCSqGSIb3DQEBBQUAA4IB +AQAnNZcAiosovcYzMB4p/OL31ZjUQLtgyr+rFywJNn9Q+kHcrpY6CiM+iVnJowft +Gzet/Hy+UUla3joKVAgWRcKZsYfNjGjgaQPpxE6YsjuMFrMOoAyYUJuTqXAJyCyj +j98C5OBxOvG0I3KgqgHf35g+FFCgMSa9KOlaMCZ1+XtgHI3zzVAmbQQnmt/VDUVH +KWss5nbZqSl9Mt3JNjy9rjXxEZ4du5A/EkdOjtd+D2JzHVImOBwYSf0wdJrE5SIv +2MCN7ZF6TACPcn9d2t0bi0Vr591pl6jFVkwPDPafepE39peC4N1xaf92P2BNPM/3 +mfnGV/TJVTl4uix5yaaIK/QI +-----END CERTIFICATE----- diff --git a/src/future/backports/test/ssl_cert.pem b/src/future/backports/test/ssl_cert.pem new file mode 100644 index 00000000..47a7d7e3 --- /dev/null +++ b/src/future/backports/test/ssl_cert.pem @@ -0,0 +1,15 @@ +-----BEGIN CERTIFICATE----- +MIICVDCCAb2gAwIBAgIJANfHOBkZr8JOMA0GCSqGSIb3DQEBBQUAMF8xCzAJBgNV +BAYTAlhZMRcwFQYDVQQHEw5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9u +IFNvZnR3YXJlIEZvdW5kYXRpb24xEjAQBgNVBAMTCWxvY2FsaG9zdDAeFw0xMDEw +MDgyMzAxNTZaFw0yMDEwMDUyMzAxNTZaMF8xCzAJBgNVBAYTAlhZMRcwFQYDVQQH +Ew5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9uIFNvZnR3YXJlIEZvdW5k +YXRpb24xEjAQBgNVBAMTCWxvY2FsaG9zdDCBnzANBgkqhkiG9w0BAQEFAAOBjQAw +gYkCgYEA21vT5isq7F68amYuuNpSFlKDPrMUCa4YWYqZRt2OZ+/3NKaZ2xAiSwr7 +6MrQF70t5nLbSPpqE5+5VrS58SY+g/sXLiFd6AplH1wJZwh78DofbFYXUggktFMt +pTyiX8jtP66bkcPkDADA089RI1TQR6Ca+n7HFa7c1fabVV6i3zkCAwEAAaMYMBYw +FAYDVR0RBA0wC4IJbG9jYWxob3N0MA0GCSqGSIb3DQEBBQUAA4GBAHPctQBEQ4wd +BJ6+JcpIraopLn8BGhbjNWj40mmRqWB/NAWF6M5ne7KpGAu7tLeG4hb1zLaldK8G +lxy2GPSRF6LFS48dpEj2HbMv2nvv6xxalDMJ9+DicWgAKTQ6bcX2j3GUkCR0g/T1 +CRlNBAAlvhKzO7Clpf9l0YKBEfraJByX +-----END CERTIFICATE----- diff --git a/src/future/backports/test/ssl_key.passwd.pem b/src/future/backports/test/ssl_key.passwd.pem new file mode 100644 index 00000000..2524672e --- /dev/null +++ b/src/future/backports/test/ssl_key.passwd.pem @@ -0,0 +1,18 @@ +-----BEGIN RSA PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: DES-EDE3-CBC,1A8D9D2A02EC698A + +kJYbfZ8L0sfe9Oty3gw0aloNnY5E8fegRfQLZlNoxTl6jNt0nIwI8kDJ36CZgR9c +u3FDJm/KqrfUoz8vW+qEnWhSG7QPX2wWGPHd4K94Yz/FgrRzZ0DoK7XxXq9gOtVA +AVGQhnz32p+6WhfGsCr9ArXEwRZrTk/FvzEPaU5fHcoSkrNVAGX8IpSVkSDwEDQr +Gv17+cfk99UV1OCza6yKHoFkTtrC+PZU71LomBabivS2Oc4B9hYuSR2hF01wTHP+ +YlWNagZOOVtNz4oKK9x9eNQpmfQXQvPPTfusexKIbKfZrMvJoxcm1gfcZ0H/wK6P +6wmXSG35qMOOztCZNtperjs1wzEBXznyK8QmLcAJBjkfarABJX9vBEzZV0OUKhy+ +noORFwHTllphbmydLhu6ehLUZMHPhzAS5UN7srtpSN81eerDMy0RMUAwA7/PofX1 +94Me85Q8jP0PC9ETdsJcPqLzAPETEYu0ELewKRcrdyWi+tlLFrpE5KT/s5ecbl9l +7B61U4Kfd1PIXc/siINhU3A3bYK+845YyUArUOnKf1kEox7p1RpD7yFqVT04lRTo +cibNKATBusXSuBrp2G6GNuhWEOSafWCKJQAzgCYIp6ZTV2khhMUGppc/2H3CF6cO +zX0KtlPVZC7hLkB6HT8SxYUwF1zqWY7+/XPPdc37MeEZ87Q3UuZwqORLY+Z0hpgt +L5JXBCoklZhCAaN2GqwFLXtGiRSRFGY7xXIhbDTlE65Wv1WGGgDLMKGE1gOz3yAo +2jjG1+yAHJUdE69XTFHSqSkvaloA1W03LdMXZ9VuQJ/ySXCie6ABAQ== +-----END RSA PRIVATE KEY----- diff --git a/src/future/backports/test/ssl_key.pem b/src/future/backports/test/ssl_key.pem new file mode 100644 index 00000000..3fd3bbd5 --- /dev/null +++ b/src/future/backports/test/ssl_key.pem @@ -0,0 +1,16 @@ +-----BEGIN PRIVATE KEY----- +MIICdwIBADANBgkqhkiG9w0BAQEFAASCAmEwggJdAgEAAoGBANtb0+YrKuxevGpm +LrjaUhZSgz6zFAmuGFmKmUbdjmfv9zSmmdsQIksK++jK0Be9LeZy20j6ahOfuVa0 +ufEmPoP7Fy4hXegKZR9cCWcIe/A6H2xWF1IIJLRTLaU8ol/I7T+um5HD5AwAwNPP +USNU0Eegmvp+xxWu3NX2m1Veot85AgMBAAECgYA3ZdZ673X0oexFlq7AAmrutkHt +CL7LvwrpOiaBjhyTxTeSNWzvtQBkIU8DOI0bIazA4UreAFffwtvEuPmonDb3F+Iq +SMAu42XcGyVZEl+gHlTPU9XRX7nTOXVt+MlRRRxL6t9GkGfUAXI3XxJDXW3c0vBK +UL9xqD8cORXOfE06rQJBAP8mEX1ERkR64Ptsoe4281vjTlNfIbs7NMPkUnrn9N/Y +BLhjNIfQ3HFZG8BTMLfX7kCS9D593DW5tV4Z9BP/c6cCQQDcFzCcVArNh2JSywOQ +ZfTfRbJg/Z5Lt9Fkngv1meeGNPgIMLN8Sg679pAOOWmzdMO3V706rNPzSVMME7E5 +oPIfAkEA8pDddarP5tCvTTgUpmTFbakm0KoTZm2+FzHcnA4jRh+XNTjTOv98Y6Ik +eO5d1ZnKXseWvkZncQgxfdnMqqpj5wJAcNq/RVne1DbYlwWchT2Si65MYmmJ8t+F +0mcsULqjOnEMwf5e+ptq5LzwbyrHZYq5FNk7ocufPv/ZQrcSSC+cFwJBAKvOJByS +x56qyGeZLOQlWS2JS3KJo59XuLFGqcbgN9Om9xFa41Yb4N9NvplFivsvZdw3m1Q/ +SPIXQuT8RMPDVNQ= +-----END PRIVATE KEY----- diff --git a/src/future/backports/test/ssl_servers.py b/src/future/backports/test/ssl_servers.py new file mode 100644 index 00000000..87a3fb85 --- /dev/null +++ b/src/future/backports/test/ssl_servers.py @@ -0,0 +1,207 @@ +from __future__ import absolute_import, division, print_function, unicode_literals +from future.builtins import filter, str +from future import utils +import os +import sys +import ssl +import pprint +import socket +from future.backports.urllib import parse as urllib_parse +from future.backports.http.server import (HTTPServer as _HTTPServer, + SimpleHTTPRequestHandler, BaseHTTPRequestHandler) +from future.backports.test import support +threading = support.import_module("threading") + +here = os.path.dirname(__file__) + +HOST = support.HOST +CERTFILE = os.path.join(here, 'keycert.pem') + +# This one's based on HTTPServer, which is based on SocketServer + +class HTTPSServer(_HTTPServer): + + def __init__(self, server_address, handler_class, context): + _HTTPServer.__init__(self, server_address, handler_class) + self.context = context + + def __str__(self): + return ('<%s %s:%s>' % + (self.__class__.__name__, + self.server_name, + self.server_port)) + + def get_request(self): + # override this to wrap socket with SSL + try: + sock, addr = self.socket.accept() + sslconn = self.context.wrap_socket(sock, server_side=True) + except socket.error as e: + # socket errors are silenced by the caller, print them here + if support.verbose: + sys.stderr.write("Got an error:\n%s\n" % e) + raise + return sslconn, addr + +class RootedHTTPRequestHandler(SimpleHTTPRequestHandler): + # need to override translate_path to get a known root, + # instead of using os.curdir, since the test could be + # run from anywhere + + server_version = "TestHTTPS/1.0" + root = here + # Avoid hanging when a request gets interrupted by the client + timeout = 5 + + def translate_path(self, path): + """Translate a /-separated PATH to the local filename syntax. + + Components that mean special things to the local file system + (e.g. drive or directory names) are ignored. (XXX They should + probably be diagnosed.) + + """ + # abandon query parameters + path = urllib.parse.urlparse(path)[2] + path = os.path.normpath(urllib.parse.unquote(path)) + words = path.split('/') + words = filter(None, words) + path = self.root + for word in words: + drive, word = os.path.splitdrive(word) + head, word = os.path.split(word) + path = os.path.join(path, word) + return path + + def log_message(self, format, *args): + # we override this to suppress logging unless "verbose" + if support.verbose: + sys.stdout.write(" server (%s:%d %s):\n [%s] %s\n" % + (self.server.server_address, + self.server.server_port, + self.request.cipher(), + self.log_date_time_string(), + format%args)) + + +class StatsRequestHandler(BaseHTTPRequestHandler): + """Example HTTP request handler which returns SSL statistics on GET + requests. + """ + + server_version = "StatsHTTPS/1.0" + + def do_GET(self, send_body=True): + """Serve a GET request.""" + sock = self.rfile.raw._sock + context = sock.context + stats = { + 'session_cache': context.session_stats(), + 'cipher': sock.cipher(), + 'compression': sock.compression(), + } + body = pprint.pformat(stats) + body = body.encode('utf-8') + self.send_response(200) + self.send_header("Content-type", "text/plain; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + if send_body: + self.wfile.write(body) + + def do_HEAD(self): + """Serve a HEAD request.""" + self.do_GET(send_body=False) + + def log_request(self, format, *args): + if support.verbose: + BaseHTTPRequestHandler.log_request(self, format, *args) + + +class HTTPSServerThread(threading.Thread): + + def __init__(self, context, host=HOST, handler_class=None): + self.flag = None + self.server = HTTPSServer((host, 0), + handler_class or RootedHTTPRequestHandler, + context) + self.port = self.server.server_port + threading.Thread.__init__(self) + self.daemon = True + + def __str__(self): + return "<%s %s>" % (self.__class__.__name__, self.server) + + def start(self, flag=None): + self.flag = flag + threading.Thread.start(self) + + def run(self): + if self.flag: + self.flag.set() + try: + self.server.serve_forever(0.05) + finally: + self.server.server_close() + + def stop(self): + self.server.shutdown() + + +def make_https_server(case, certfile=CERTFILE, host=HOST, handler_class=None): + # we assume the certfile contains both private key and certificate + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.load_cert_chain(certfile) + server = HTTPSServerThread(context, host, handler_class) + flag = threading.Event() + server.start(flag) + flag.wait() + def cleanup(): + if support.verbose: + sys.stdout.write('stopping HTTPS server\n') + server.stop() + if support.verbose: + sys.stdout.write('joining HTTPS thread\n') + server.join() + case.addCleanup(cleanup) + return server + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser( + description='Run a test HTTPS server. ' + 'By default, the current directory is served.') + parser.add_argument('-p', '--port', type=int, default=4433, + help='port to listen on (default: %(default)s)') + parser.add_argument('-q', '--quiet', dest='verbose', default=True, + action='store_false', help='be less verbose') + parser.add_argument('-s', '--stats', dest='use_stats_handler', default=False, + action='store_true', help='always return stats page') + parser.add_argument('--curve-name', dest='curve_name', type=str, + action='store', + help='curve name for EC-based Diffie-Hellman') + parser.add_argument('--dh', dest='dh_file', type=str, action='store', + help='PEM file containing DH parameters') + args = parser.parse_args() + + support.verbose = args.verbose + if args.use_stats_handler: + handler_class = StatsRequestHandler + else: + handler_class = RootedHTTPRequestHandler + if utils.PY2: + handler_class.root = os.getcwdu() + else: + handler_class.root = os.getcwd() + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + context.load_cert_chain(CERTFILE) + if args.curve_name: + context.set_ecdh_curve(args.curve_name) + if args.dh_file: + context.load_dh_params(args.dh_file) + + server = HTTPSServer(("", args.port), handler_class, context) + if args.verbose: + print("Listening on https://localhost:{0.port}".format(args)) + server.serve_forever(0.1) diff --git a/future/standard_library/test/support.py b/src/future/backports/test/support.py similarity index 91% rename from future/standard_library/test/support.py rename to src/future/backports/test/support.py index a62b1394..6639372b 100644 --- a/future/standard_library/test/support.py +++ b/src/future/backports/test/support.py @@ -6,13 +6,8 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -import future.standard_library from future import utils -from future.builtins import * - - -# if __name__ != 'test.support': -# raise ImportError('support must be imported from the test package') +from future.builtins import str, range, open, int, map, list import contextlib import errno @@ -33,15 +28,22 @@ # import collections.abc # not present on Py2.7 import re import subprocess -import imp import time +try: + import sysconfig +except ImportError: + # sysconfig is not available on Python 2.6. Try using distutils.sysconfig instead: + from distutils import sysconfig import fnmatch import logging.handlers import struct import tempfile -import _testcapi + try: - import thread, threading + if utils.PY3: + import _thread, threading + else: + import thread as _thread, threading except ImportError: _thread = None threading = None @@ -55,6 +57,11 @@ except ImportError: zlib = None +try: + import gzip +except ImportError: + gzip = None + try: import bz2 except ImportError: @@ -84,7 +91,7 @@ "TestHandler", "Matcher", "can_symlink", "skip_unless_symlink", "skip_unless_xattr", "import_fresh_module", "requires_zlib", "PIPE_MAX_SIZE", "failfast", "anticipate_failure", "run_with_tz", - "requires_bz2", "requires_lzma", "suppress_crash_popup", + "requires_gzip", "requires_bz2", "requires_lzma", "suppress_crash_popup", ] class Error(Exception): @@ -132,7 +139,8 @@ def import_module(name, deprecated=False): def _save_and_remove_module(name, orig_modules): """Helper function to save and remove a module from sys.modules - Raise ImportError if the module can't be imported.""" + Raise ImportError if the module can't be imported. + """ # try to import the module and raise an error if it can't be imported if name not in sys.modules: __import__(name) @@ -145,7 +153,8 @@ def _save_and_remove_module(name, orig_modules): def _save_and_block_module(name, orig_modules): """Helper function to save and block a module in sys.modules - Return True if the module was in sys.modules, False otherwise.""" + Return True if the module was in sys.modules, False otherwise. + """ saved = True try: orig_modules[name] = sys.modules[name] @@ -167,18 +176,32 @@ def anticipate_failure(condition): def import_fresh_module(name, fresh=(), blocked=(), deprecated=False): - """Imports and returns a module, deliberately bypassing the sys.modules cache - and importing a fresh copy of the module. Once the import is complete, - the sys.modules cache is restored to its original state. + """Import and return a module, deliberately bypassing sys.modules. + This function imports and returns a fresh copy of the named Python module + by removing the named module from sys.modules before doing the import. + Note that unlike reload, the original module is not affected by + this operation. + + *fresh* is an iterable of additional module names that are also removed + from the sys.modules cache before doing the import. + + *blocked* is an iterable of module names that are replaced with None + in the module cache during the import to ensure that attempts to import + them raise ImportError. - Modules named in fresh are also imported anew if needed by the import. - If one of these modules can't be imported, None is returned. + The named module and any modules named in the *fresh* and *blocked* + parameters are saved before starting the import and then reinserted into + sys.modules when the fresh import is complete. - Importing of modules named in blocked is prevented while the fresh import - takes place. + Module and package deprecation messages are suppressed during this import + if *deprecated* is True. + + This function will raise ImportError if the named module cannot be + imported. If deprecated is True, any module or package deprecation messages - will be suppressed.""" + will be suppressed. + """ # NOTE: test_heapq, test_json and test_warnings include extra sanity checks # to make sure that this utility function is working as expected with _ignore_deprecated_imports(deprecated): @@ -240,7 +263,7 @@ def unload(name): if sys.platform.startswith("win"): def _waitfor(func, pathname, waitall=False): - # Peform the operation + # Perform the operation func(pathname) # Now setup the wait loop if waitall: @@ -256,7 +279,7 @@ def _waitfor(func, pathname, waitall=False): # required when contention occurs. timeout = 0.001 while timeout < 1.0: - # Note we are only testing for the existance of the file(s) in + # Note we are only testing for the existence of the file(s) in # the contents of the directory regardless of any security or # access rights. If we have made it this far, we have sufficient # permissions to do that much using Python's equivalent of the @@ -317,37 +340,6 @@ def rmtree(path): if error.errno != errno.ENOENT: raise -def make_legacy_pyc(source): - """Move a PEP 3147 pyc/pyo file to its legacy pyc/pyo location. - - The choice of .pyc or .pyo extension is done based on the __debug__ flag - value. - - :param source: The file system path to the source file. The source file - does not need to exist, however the PEP 3147 pyc file must exist. - :return: The file system path to the legacy pyc file. - """ - pyc_file = imp.cache_from_source(source) - up_one = os.path.dirname(os.path.abspath(source)) - legacy_pyc = os.path.join(up_one, source + ('c' if __debug__ else 'o')) - os.rename(pyc_file, legacy_pyc) - return legacy_pyc - -def forget(modname): - """'Forget' a module was ever imported. - - This removes the module from sys.modules and deletes any PEP 3147 or - legacy .pyc and .pyo files. - """ - unload(modname) - for dirname in sys.path: - source = os.path.join(dirname, modname + '.py') - # It doesn't matter if they exist or not, unlink all possible - # combinations of PEP 3147 and legacy pyc and pyo files. - unlink(source + 'c') - unlink(source + 'o') - unlink(imp.cache_from_source(source, debug_override=True)) - unlink(imp.cache_from_source(source, debug_override=False)) # On some platforms, should not run gui test even if it is allowed # in `use_resources'. @@ -424,6 +416,8 @@ def wrapper(*args, **kw): raise unittest.SkipTest( "%s version %s or higher required, not %s" % (sysname, min_version_txt, version_txt)) + return func(*args, **kw) + wrapper.min_version = min_version return wrapper return decorator @@ -472,8 +466,11 @@ def wrapper(*args, **kw): return wrapper return decorator +# Don't use "localhost", since resolving it uses the DNS under recent +# Windows versions (see issue #18792). +HOST = "127.0.0.1" +HOSTv6 = "::1" -HOST = 'localhost' def find_unused_port(family=socket.AF_INET, socktype=socket.SOCK_STREAM): """Returns an unused port that should be suitable for binding. This is @@ -558,9 +555,15 @@ def bind_port(sock, host=HOST): raise TestFailed("tests should never set the SO_REUSEADDR " \ "socket option on TCP/IP sockets!") if hasattr(socket, 'SO_REUSEPORT'): - if sock.getsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT) == 1: - raise TestFailed("tests should never set the SO_REUSEPORT " \ - "socket option on TCP/IP sockets!") + try: + if sock.getsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT) == 1: + raise TestFailed("tests should never set the SO_REUSEPORT " \ + "socket option on TCP/IP sockets!") + except socket.error: + # Python's socket module was compiled using modern headers + # thus defining SO_REUSEPORT but this process is running + # under an older kernel that does not support SO_REUSEPORT. + pass if hasattr(socket, 'SO_EXCLUSIVEADDRUSE'): sock.setsockopt(socket.SOL_SOCKET, socket.SO_EXCLUSIVEADDRUSE, 1) @@ -586,11 +589,19 @@ def _is_ipv6_enabled(): IPV6_ENABLED = _is_ipv6_enabled() -# A constant likely larger than the underlying OS pipe buffer size. -# Windows limit seems to be around 512B, and many Unix kernels have a 64K pipe -# buffer size or 16*PAGE_SIZE: take a few megs to be sure. This -PIPE_MAX_SIZE = 3 * 1000 * 1000 +# A constant likely larger than the underlying OS pipe buffer size, to +# make writes blocking. +# Windows limit seems to be around 512 B, and many Unix kernels have a +# 64 KiB pipe buffer size or 16 * PAGE_SIZE: take a few megs to be sure. +# (see issue #17835 for a discussion of this number). +PIPE_MAX_SIZE = 4 * 1024 * 1024 + 1 +# A constant likely larger than the underlying OS socket buffer size, to make +# writes blocking. +# The socket buffer sizes can usually be tuned system-wide (e.g. through sysctl +# on Linux), or on a per-socket basis (SO_SNDBUF/SO_RCVBUF). See issue #18643 +# for a discussion of this number). +SOCK_MAX_SIZE = 16 * 1024 * 1024 + 1 # # decorator for skipping tests on non-IEEE 754 platforms # requires_IEEE_754 = unittest.skipUnless( @@ -623,7 +634,7 @@ def _is_ipv6_enabled(): # # First try printable and common characters to have a readable filename. # # For each character, the encoding list are just example of encodings able # # to encode the character (the list is not exhaustive). -# +# # # U+00E6 (Latin Small Letter Ae): cp1252, iso-8859-1 # '\u00E6', # # U+0130 (Latin Capital Letter I With Dot Above): cp1254, iso8859_3 @@ -642,11 +653,11 @@ def _is_ipv6_enabled(): # '\u062A', # # U+0E01 (Thai Character Ko Kai): cp874 # '\u0E01', -# +# # # Then try more "special" characters. "special" because they may be # # interpreted or displayed differently depending on the exact locale # # encoding and the font. -# +# # # U+00A0 (No-Break Space) # '\u00A0', # # U+20AC (Euro Sign) @@ -659,7 +670,7 @@ def _is_ipv6_enabled(): # else: # FS_NONASCII = character # break -# +# # # TESTFN_UNICODE is a non-ascii filename # TESTFN_UNICODE = TESTFN + "-\xe0\xf2\u0258\u0141\u011f" # if sys.platform == 'darwin': @@ -669,7 +680,7 @@ def _is_ipv6_enabled(): # import unicodedata # TESTFN_UNICODE = unicodedata.normalize('NFD', TESTFN_UNICODE) # TESTFN_ENCODING = sys.getfilesystemencoding() -# +# # # TESTFN_UNENCODABLE is a filename (str type) that should *not* be able to be # # encoded by the filesystem encoding (in strict mode). It can be None if we # # cannot generate such filename. @@ -702,7 +713,7 @@ def _is_ipv6_enabled(): # # File system encoding (eg. ISO-8859-* encodings) can encode # # the byte 0xff. Skip some unicode filename tests. # pass -# +# # # TESTFN_UNDECODABLE is a filename (bytes type) that should *not* be able to be # # decoded from the filesystem encoding (in strict mode). It can be None if we # # cannot generate such filename (ex: the latin1 encoding can decode any byte @@ -732,7 +743,7 @@ def _is_ipv6_enabled(): # except UnicodeDecodeError: # TESTFN_UNDECODABLE = os.fsencode(TESTFN) + name # break -# +# # if FS_NONASCII: # TESTFN_NONASCII = TESTFN + '-' + FS_NONASCII # else: @@ -836,11 +847,12 @@ def check_syntax_error(testcase, statement): '', 'exec') def open_urlresource(url, *args, **kw): - import urllib.request, urllib.parse + from future.backports.urllib import (request as urllib_request, + parse as urllib_parse) check = kw.pop('check', None) - filename = urllib.parse.urlparse(url)[2].split('/')[-1] # '/': it's URL! + filename = urllib_parse.urlparse(url)[2].split('/')[-1] # '/': it's URL! fn = os.path.join(os.path.dirname(__file__), "data", filename) @@ -863,7 +875,7 @@ def check_valid_file(fn): requires('urlfetch') print('\tfetching %s ...' % url, file=get_original_stdout()) - f = urllib.request.urlopen(url, timeout=15) + f = urllib_request.urlopen(url, timeout=15) try: with open(fn, "wb") as out: s = f.read() @@ -912,7 +924,12 @@ def _filterwarnings(filters, quiet=False): frame = sys._getframe(2) registry = frame.f_globals.get('__warningregistry__') if registry: - registry.clear() + if utils.PY3: + registry.clear() + else: + # Py2-compatible: + for i in range(len(registry)): + registry.pop() with warnings.catch_warnings(record=True) as w: # Set filter "always" to record all warnings. Because # test_warnings swap the module, we need to look up in @@ -1116,69 +1133,73 @@ def __exit__(self, type_=None, value=None, traceback=None): ioerror_peer_reset = TransientResource(IOError, errno=errno.ECONNRESET) -# @contextlib.contextmanager -# def transient_internet(resource_name, timeout=30.0, errnos=()): -# """Return a context manager that raises ResourceDenied when various issues -# with the Internet connection manifest themselves as exceptions.""" -# default_errnos = [ -# ('ECONNREFUSED', 111), -# ('ECONNRESET', 104), -# ('EHOSTUNREACH', 113), -# ('ENETUNREACH', 101), -# ('ETIMEDOUT', 110), -# ] -# default_gai_errnos = [ -# ('EAI_AGAIN', -3), -# ('EAI_FAIL', -4), -# ('EAI_NONAME', -2), -# ('EAI_NODATA', -5), -# # Encountered when trying to resolve IPv6-only hostnames -# ('WSANO_DATA', 11004), -# ] -# -# denied = ResourceDenied("Resource %r is not available" % resource_name) -# captured_errnos = errnos -# gai_errnos = [] -# if not captured_errnos: -# captured_errnos = [getattr(errno, name, num) -# for (name, num) in default_errnos] -# gai_errnos = [getattr(socket, name, num) -# for (name, num) in default_gai_errnos] -# -# def filter_error(err): -# n = getattr(err, 'errno', None) -# if (isinstance(err, socket.timeout) or -# (isinstance(err, socket.gaierror) and n in gai_errnos) or -# n in captured_errnos): -# if not verbose: -# sys.stderr.write(denied.args[0] + "\n") -# raise denied from err -# -# old_timeout = socket.getdefaulttimeout() -# try: -# if timeout is not None: -# socket.setdefaulttimeout(timeout) -# yield -# except IOError as err: -# # urllib can wrap original socket errors multiple times (!), we must -# # unwrap to get at the original error. -# while True: -# a = err.args -# if len(a) >= 1 and isinstance(a[0], IOError): -# err = a[0] -# # The error can also be wrapped as args[1]: -# # except socket.error as msg: -# # raise IOError('socket error', msg).with_traceback(sys.exc_info()[2]) -# elif len(a) >= 2 and isinstance(a[1], IOError): -# err = a[1] -# else: -# break -# filter_error(err) -# raise -# # XXX should we catch generic exceptions and look for their -# # __cause__ or __context__? -# finally: -# socket.setdefaulttimeout(old_timeout) +@contextlib.contextmanager +def transient_internet(resource_name, timeout=30.0, errnos=()): + """Return a context manager that raises ResourceDenied when various issues + with the Internet connection manifest themselves as exceptions.""" + default_errnos = [ + ('ECONNREFUSED', 111), + ('ECONNRESET', 104), + ('EHOSTUNREACH', 113), + ('ENETUNREACH', 101), + ('ETIMEDOUT', 110), + ] + default_gai_errnos = [ + ('EAI_AGAIN', -3), + ('EAI_FAIL', -4), + ('EAI_NONAME', -2), + ('EAI_NODATA', -5), + # Encountered when trying to resolve IPv6-only hostnames + ('WSANO_DATA', 11004), + ] + + denied = ResourceDenied("Resource %r is not available" % resource_name) + captured_errnos = errnos + gai_errnos = [] + if not captured_errnos: + captured_errnos = [getattr(errno, name, num) + for (name, num) in default_errnos] + gai_errnos = [getattr(socket, name, num) + for (name, num) in default_gai_errnos] + + def filter_error(err): + n = getattr(err, 'errno', None) + if (isinstance(err, socket.timeout) or + (isinstance(err, socket.gaierror) and n in gai_errnos) or + n in captured_errnos): + if not verbose: + sys.stderr.write(denied.args[0] + "\n") + # Was: raise denied from err + # For Python-Future: + exc = denied + exc.__cause__ = err + raise exc + + old_timeout = socket.getdefaulttimeout() + try: + if timeout is not None: + socket.setdefaulttimeout(timeout) + yield + except IOError as err: + # urllib can wrap original socket errors multiple times (!), we must + # unwrap to get at the original error. + while True: + a = err.args + if len(a) >= 1 and isinstance(a[0], IOError): + err = a[0] + # The error can also be wrapped as args[1]: + # except socket.error as msg: + # raise IOError('socket error', msg).with_traceback(sys.exc_info()[2]) + elif len(a) >= 2 and isinstance(a[1], IOError): + err = a[1] + else: + break + filter_error(err) + raise + # XXX should we catch generic exceptions and look for their + # __cause__ or __context__? + finally: + socket.setdefaulttimeout(old_timeout) @contextlib.contextmanager @@ -1614,15 +1635,15 @@ def case_pred(test): # We don't have sysconfig on Py2.6: # #======================================================================= # # Check for the presence of docstrings. -# +# # HAVE_DOCSTRINGS = (check_impl_detail(cpython=False) or # sys.platform == 'win32' or # sysconfig.get_config_var('WITH_DOC_STRINGS')) -# +# # requires_docstrings = unittest.skipUnless(HAVE_DOCSTRINGS, # "test requires docstrings") -# -# +# +# # #======================================================================= # doctest driver. @@ -1662,7 +1683,12 @@ def modules_cleanup(oldmodules): # globals will be set to None which will trip up the cached functions. encodings = [(k, v) for k, v in sys.modules.items() if k.startswith('encodings.')] - sys.modules.clear() + # Was: + # sys.modules.clear() + # Py2-compatible: + for i in range(len(sys.modules)): + sys.modules.pop() + sys.modules.update(encodings) # XXX: This kind of problem can affect more than just encodings. In particular # extension modules (such as _ssl) don't cope with reloading properly. @@ -1673,7 +1699,7 @@ def modules_cleanup(oldmodules): sys.modules.update(oldmodules) #======================================================================= -# Py2.7 versions of threading_setup() and threading_cleanup() which don't refer +# Backported versions of threading_setup() and threading_cleanup() which don't refer # to threading._dangling (not available on Py2.7). # Threading support to prevent reporting refleaks when running regrtest.py -R @@ -1687,25 +1713,23 @@ def modules_cleanup(oldmodules): # at the end of a test run. def threading_setup(): - if thread: - return thread._count(), + if _thread: + return _thread._count(), else: return 1, def threading_cleanup(nb_threads): - if not thread: + if not _thread: return _MAX_COUNT = 10 for count in range(_MAX_COUNT): - n = thread._count() + n = _thread._count() if n == nb_threads: break time.sleep(0.1) # XXX print a warning in case of failure? - -#======================================================================= def reap_threads(func): """Use this function when threads are being used. This will ensure that the threads are cleaned up even when the test fails. diff --git a/future/standard_library/total_ordering.py b/src/future/backports/total_ordering.py similarity index 100% rename from future/standard_library/total_ordering.py rename to src/future/backports/total_ordering.py diff --git a/src/future/backports/urllib/__init__.py b/src/future/backports/urllib/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/future/backports/urllib/error.py b/src/future/backports/urllib/error.py new file mode 100644 index 00000000..a473e445 --- /dev/null +++ b/src/future/backports/urllib/error.py @@ -0,0 +1,75 @@ +"""Exception classes raised by urllib. + +The base exception class is URLError, which inherits from IOError. It +doesn't define any behavior of its own, but is the base class for all +exceptions defined in this package. + +HTTPError is an exception class that is also a valid HTTP response +instance. It behaves this way because HTTP protocol errors are valid +responses, with a status code, headers, and a body. In some contexts, +an application may want to handle an exception like a regular +response. +""" +from __future__ import absolute_import, division, unicode_literals +from future import standard_library + +from future.backports.urllib import response as urllib_response + + +__all__ = ['URLError', 'HTTPError', 'ContentTooShortError'] + + +# do these error classes make sense? +# make sure all of the IOError stuff is overridden. we just want to be +# subtypes. + +class URLError(IOError): + # URLError is a sub-type of IOError, but it doesn't share any of + # the implementation. need to override __init__ and __str__. + # It sets self.args for compatibility with other EnvironmentError + # subclasses, but args doesn't have the typical format with errno in + # slot 0 and strerror in slot 1. This may be better than nothing. + def __init__(self, reason, filename=None): + self.args = reason, + self.reason = reason + if filename is not None: + self.filename = filename + + def __str__(self): + return '' % self.reason + +class HTTPError(URLError, urllib_response.addinfourl): + """Raised when HTTP error occurs, but also acts like non-error return""" + __super_init = urllib_response.addinfourl.__init__ + + def __init__(self, url, code, msg, hdrs, fp): + self.code = code + self.msg = msg + self.hdrs = hdrs + self.fp = fp + self.filename = url + # The addinfourl classes depend on fp being a valid file + # object. In some cases, the HTTPError may not have a valid + # file object. If this happens, the simplest workaround is to + # not initialize the base classes. + if fp is not None: + self.__super_init(fp, hdrs, url, code) + + def __str__(self): + return 'HTTP Error %s: %s' % (self.code, self.msg) + + # since URLError specifies a .reason attribute, HTTPError should also + # provide this attribute. See issue13211 for discussion. + @property + def reason(self): + return self.msg + + def info(self): + return self.hdrs + + +# exception raised when downloaded size does not match content-length +class ContentTooShortError(URLError): + def __init__(self, message, content): + URLError.__init__(self, message) + self.content = content diff --git a/src/future/backports/urllib/parse.py b/src/future/backports/urllib/parse.py new file mode 100644 index 00000000..04e52d49 --- /dev/null +++ b/src/future/backports/urllib/parse.py @@ -0,0 +1,991 @@ +""" +Ported using Python-Future from the Python 3.3 standard library. + +Parse (absolute and relative) URLs. + +urlparse module is based upon the following RFC specifications. + +RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding +and L. Masinter, January 2005. + +RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter +and L.Masinter, December 1999. + +RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T. +Berners-Lee, R. Fielding, and L. Masinter, August 1998. + +RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998. + +RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June +1995. + +RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. +McCahill, December 1994 + +RFC 3986 is considered the current standard and any future changes to +urlparse module should conform with it. The urlparse module is +currently not entirely compliant with this RFC due to defacto +scenarios for parsing, and for backward compatibility purposes, some +parsing quirks from older RFCs are retained. The testcases in +test_urlparse.py provides a good indicator of parsing behavior. +""" +from __future__ import absolute_import, division, unicode_literals +from future.builtins import bytes, chr, dict, int, range, str +from future.utils import raise_with_traceback + +import re +import sys +import collections + +__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", + "urlsplit", "urlunsplit", "urlencode", "parse_qs", + "parse_qsl", "quote", "quote_plus", "quote_from_bytes", + "unquote", "unquote_plus", "unquote_to_bytes"] + +# A classification of schemes ('' means apply by default) +uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', + 'wais', 'file', 'https', 'shttp', 'mms', + 'prospero', 'rtsp', 'rtspu', '', 'sftp', + 'svn', 'svn+ssh'] +uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', + 'imap', 'wais', 'file', 'mms', 'https', 'shttp', + 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', + 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh'] +uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', + 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', + 'mms', '', 'sftp', 'tel'] + +# These are not actually used anymore, but should stay for backwards +# compatibility. (They are undocumented, but have a public-looking name.) +non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', + 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] +uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', + 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] +uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', + 'nntp', 'wais', 'https', 'shttp', 'snews', + 'file', 'prospero', ''] + +# Characters valid in scheme names +scheme_chars = ('abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '0123456789' + '+-.') + +# XXX: Consider replacing with functools.lru_cache +MAX_CACHE_SIZE = 20 +_parse_cache = {} + +def clear_cache(): + """Clear the parse cache and the quoters cache.""" + _parse_cache.clear() + _safe_quoters.clear() + + +# Helpers for bytes handling +# For 3.2, we deliberately require applications that +# handle improperly quoted URLs to do their own +# decoding and encoding. If valid use cases are +# presented, we may relax this by using latin-1 +# decoding internally for 3.3 +_implicit_encoding = 'ascii' +_implicit_errors = 'strict' + +def _noop(obj): + return obj + +def _encode_result(obj, encoding=_implicit_encoding, + errors=_implicit_errors): + return obj.encode(encoding, errors) + +def _decode_args(args, encoding=_implicit_encoding, + errors=_implicit_errors): + return tuple(x.decode(encoding, errors) if x else '' for x in args) + +def _coerce_args(*args): + # Invokes decode if necessary to create str args + # and returns the coerced inputs along with + # an appropriate result coercion function + # - noop for str inputs + # - encoding function otherwise + str_input = isinstance(args[0], str) + for arg in args[1:]: + # We special-case the empty string to support the + # "scheme=''" default argument to some functions + if arg and isinstance(arg, str) != str_input: + raise TypeError("Cannot mix str and non-str arguments") + if str_input: + return args + (_noop,) + return _decode_args(args) + (_encode_result,) + +# Result objects are more helpful than simple tuples +class _ResultMixinStr(object): + """Standard approach to encoding parsed results from str to bytes""" + __slots__ = () + + def encode(self, encoding='ascii', errors='strict'): + return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self)) + + +class _ResultMixinBytes(object): + """Standard approach to decoding parsed results from bytes to str""" + __slots__ = () + + def decode(self, encoding='ascii', errors='strict'): + return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self)) + + +class _NetlocResultMixinBase(object): + """Shared methods for the parsed result objects containing a netloc element""" + __slots__ = () + + @property + def username(self): + return self._userinfo[0] + + @property + def password(self): + return self._userinfo[1] + + @property + def hostname(self): + hostname = self._hostinfo[0] + if not hostname: + hostname = None + elif hostname is not None: + hostname = hostname.lower() + return hostname + + @property + def port(self): + port = self._hostinfo[1] + if port is not None: + port = int(port, 10) + # Return None on an illegal port + if not ( 0 <= port <= 65535): + return None + return port + + +class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): + __slots__ = () + + @property + def _userinfo(self): + netloc = self.netloc + userinfo, have_info, hostinfo = netloc.rpartition('@') + if have_info: + username, have_password, password = userinfo.partition(':') + if not have_password: + password = None + else: + username = password = None + return username, password + + @property + def _hostinfo(self): + netloc = self.netloc + _, _, hostinfo = netloc.rpartition('@') + _, have_open_br, bracketed = hostinfo.partition('[') + if have_open_br: + hostname, _, port = bracketed.partition(']') + _, have_port, port = port.partition(':') + else: + hostname, have_port, port = hostinfo.partition(':') + if not have_port: + port = None + return hostname, port + + +class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): + __slots__ = () + + @property + def _userinfo(self): + netloc = self.netloc + userinfo, have_info, hostinfo = netloc.rpartition(b'@') + if have_info: + username, have_password, password = userinfo.partition(b':') + if not have_password: + password = None + else: + username = password = None + return username, password + + @property + def _hostinfo(self): + netloc = self.netloc + _, _, hostinfo = netloc.rpartition(b'@') + _, have_open_br, bracketed = hostinfo.partition(b'[') + if have_open_br: + hostname, _, port = bracketed.partition(b']') + _, have_port, port = port.partition(b':') + else: + hostname, have_port, port = hostinfo.partition(b':') + if not have_port: + port = None + return hostname, port + + +from collections import namedtuple + +_DefragResultBase = namedtuple('DefragResult', 'url fragment') +_SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment') +_ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment') + +# For backwards compatibility, alias _NetlocResultMixinStr +# ResultBase is no longer part of the documented API, but it is +# retained since deprecating it isn't worth the hassle +ResultBase = _NetlocResultMixinStr + +# Structured result objects for string data +class DefragResult(_DefragResultBase, _ResultMixinStr): + __slots__ = () + def geturl(self): + if self.fragment: + return self.url + '#' + self.fragment + else: + return self.url + +class SplitResult(_SplitResultBase, _NetlocResultMixinStr): + __slots__ = () + def geturl(self): + return urlunsplit(self) + +class ParseResult(_ParseResultBase, _NetlocResultMixinStr): + __slots__ = () + def geturl(self): + return urlunparse(self) + +# Structured result objects for bytes data +class DefragResultBytes(_DefragResultBase, _ResultMixinBytes): + __slots__ = () + def geturl(self): + if self.fragment: + return self.url + b'#' + self.fragment + else: + return self.url + +class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes): + __slots__ = () + def geturl(self): + return urlunsplit(self) + +class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes): + __slots__ = () + def geturl(self): + return urlunparse(self) + +# Set up the encode/decode result pairs +def _fix_result_transcoding(): + _result_pairs = ( + (DefragResult, DefragResultBytes), + (SplitResult, SplitResultBytes), + (ParseResult, ParseResultBytes), + ) + for _decoded, _encoded in _result_pairs: + _decoded._encoded_counterpart = _encoded + _encoded._decoded_counterpart = _decoded + +_fix_result_transcoding() +del _fix_result_transcoding + +def urlparse(url, scheme='', allow_fragments=True): + """Parse a URL into 6 components: + :///;?# + Return a 6-tuple: (scheme, netloc, path, params, query, fragment). + Note that we don't break the components up in smaller bits + (e.g. netloc is a single string) and we don't expand % escapes.""" + url, scheme, _coerce_result = _coerce_args(url, scheme) + splitresult = urlsplit(url, scheme, allow_fragments) + scheme, netloc, url, query, fragment = splitresult + if scheme in uses_params and ';' in url: + url, params = _splitparams(url) + else: + params = '' + result = ParseResult(scheme, netloc, url, params, query, fragment) + return _coerce_result(result) + +def _splitparams(url): + if '/' in url: + i = url.find(';', url.rfind('/')) + if i < 0: + return url, '' + else: + i = url.find(';') + return url[:i], url[i+1:] + +def _splitnetloc(url, start=0): + delim = len(url) # position of end of domain part of url, default is end + for c in '/?#': # look for delimiters; the order is NOT important + wdelim = url.find(c, start) # find first of this delim + if wdelim >= 0: # if found + delim = min(delim, wdelim) # use earliest delim position + return url[start:delim], url[delim:] # return (domain, rest) + +def urlsplit(url, scheme='', allow_fragments=True): + """Parse a URL into 5 components: + :///?# + Return a 5-tuple: (scheme, netloc, path, query, fragment). + Note that we don't break the components up in smaller bits + (e.g. netloc is a single string) and we don't expand % escapes.""" + url, scheme, _coerce_result = _coerce_args(url, scheme) + allow_fragments = bool(allow_fragments) + key = url, scheme, allow_fragments, type(url), type(scheme) + cached = _parse_cache.get(key, None) + if cached: + return _coerce_result(cached) + if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth + clear_cache() + netloc = query = fragment = '' + i = url.find(':') + if i > 0: + if url[:i] == 'http': # optimize the common case + scheme = url[:i].lower() + url = url[i+1:] + if url[:2] == '//': + netloc, url = _splitnetloc(url, 2) + if (('[' in netloc and ']' not in netloc) or + (']' in netloc and '[' not in netloc)): + raise ValueError("Invalid IPv6 URL") + if allow_fragments and '#' in url: + url, fragment = url.split('#', 1) + if '?' in url: + url, query = url.split('?', 1) + v = SplitResult(scheme, netloc, url, query, fragment) + _parse_cache[key] = v + return _coerce_result(v) + for c in url[:i]: + if c not in scheme_chars: + break + else: + # make sure "url" is not actually a port number (in which case + # "scheme" is really part of the path) + rest = url[i+1:] + if not rest or any(c not in '0123456789' for c in rest): + # not a port number + scheme, url = url[:i].lower(), rest + + if url[:2] == '//': + netloc, url = _splitnetloc(url, 2) + if (('[' in netloc and ']' not in netloc) or + (']' in netloc and '[' not in netloc)): + raise ValueError("Invalid IPv6 URL") + if allow_fragments and '#' in url: + url, fragment = url.split('#', 1) + if '?' in url: + url, query = url.split('?', 1) + v = SplitResult(scheme, netloc, url, query, fragment) + _parse_cache[key] = v + return _coerce_result(v) + +def urlunparse(components): + """Put a parsed URL back together again. This may result in a + slightly different, but equivalent URL, if the URL that was parsed + originally had redundant delimiters, e.g. a ? with an empty query + (the draft states that these are equivalent).""" + scheme, netloc, url, params, query, fragment, _coerce_result = ( + _coerce_args(*components)) + if params: + url = "%s;%s" % (url, params) + return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment))) + +def urlunsplit(components): + """Combine the elements of a tuple as returned by urlsplit() into a + complete URL as a string. The data argument can be any five-item iterable. + This may result in a slightly different, but equivalent URL, if the URL that + was parsed originally had unnecessary delimiters (for example, a ? with an + empty query; the RFC states that these are equivalent).""" + scheme, netloc, url, query, fragment, _coerce_result = ( + _coerce_args(*components)) + if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): + if url and url[:1] != '/': url = '/' + url + url = '//' + (netloc or '') + url + if scheme: + url = scheme + ':' + url + if query: + url = url + '?' + query + if fragment: + url = url + '#' + fragment + return _coerce_result(url) + +def urljoin(base, url, allow_fragments=True): + """Join a base URL and a possibly relative URL to form an absolute + interpretation of the latter.""" + if not base: + return url + if not url: + return base + base, url, _coerce_result = _coerce_args(base, url) + bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ + urlparse(base, '', allow_fragments) + scheme, netloc, path, params, query, fragment = \ + urlparse(url, bscheme, allow_fragments) + if scheme != bscheme or scheme not in uses_relative: + return _coerce_result(url) + if scheme in uses_netloc: + if netloc: + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) + netloc = bnetloc + if path[:1] == '/': + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) + if not path and not params: + path = bpath + params = bparams + if not query: + query = bquery + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) + segments = bpath.split('/')[:-1] + path.split('/') + # XXX The stuff below is bogus in various ways... + if segments[-1] == '.': + segments[-1] = '' + while '.' in segments: + segments.remove('.') + while 1: + i = 1 + n = len(segments) - 1 + while i < n: + if (segments[i] == '..' + and segments[i-1] not in ('', '..')): + del segments[i-1:i+1] + break + i = i+1 + else: + break + if segments == ['', '..']: + segments[-1] = '' + elif len(segments) >= 2 and segments[-1] == '..': + segments[-2:] = [''] + return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments), + params, query, fragment))) + +def urldefrag(url): + """Removes any existing fragment from URL. + + Returns a tuple of the defragmented URL and the fragment. If + the URL contained no fragments, the second element is the + empty string. + """ + url, _coerce_result = _coerce_args(url) + if '#' in url: + s, n, p, a, q, frag = urlparse(url) + defrag = urlunparse((s, n, p, a, q, '')) + else: + frag = '' + defrag = url + return _coerce_result(DefragResult(defrag, frag)) + +_hexdig = '0123456789ABCDEFabcdef' +_hextobyte = dict(((a + b).encode(), bytes([int(a + b, 16)])) + for a in _hexdig for b in _hexdig) + +def unquote_to_bytes(string): + """unquote_to_bytes('abc%20def') -> b'abc def'.""" + # Note: strings are encoded as UTF-8. This is only an issue if it contains + # unescaped non-ASCII characters, which URIs should not. + if not string: + # Is it a string-like object? + string.split + return bytes(b'') + if isinstance(string, str): + string = string.encode('utf-8') + ### For Python-Future: + # It is already a byte-string object, but force it to be newbytes here on + # Py2: + string = bytes(string) + ### + bits = string.split(b'%') + if len(bits) == 1: + return string + res = [bits[0]] + append = res.append + for item in bits[1:]: + try: + append(_hextobyte[item[:2]]) + append(item[2:]) + except KeyError: + append(b'%') + append(item) + return bytes(b'').join(res) + +_asciire = re.compile('([\x00-\x7f]+)') + +def unquote(string, encoding='utf-8', errors='replace'): + """Replace %xx escapes by their single-character equivalent. The optional + encoding and errors parameters specify how to decode percent-encoded + sequences into Unicode characters, as accepted by the bytes.decode() + method. + By default, percent-encoded sequences are decoded with UTF-8, and invalid + sequences are replaced by a placeholder character. + + unquote('abc%20def') -> 'abc def'. + """ + if '%' not in string: + string.split + return string + if encoding is None: + encoding = 'utf-8' + if errors is None: + errors = 'replace' + bits = _asciire.split(string) + res = [bits[0]] + append = res.append + for i in range(1, len(bits), 2): + append(unquote_to_bytes(bits[i]).decode(encoding, errors)) + append(bits[i + 1]) + return ''.join(res) + +def parse_qs(qs, keep_blank_values=False, strict_parsing=False, + encoding='utf-8', errors='replace'): + """Parse a query given as a string argument. + + Arguments: + + qs: percent-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + percent-encoded queries should be treated as blank strings. + A true value indicates that blanks should be retained as + blank strings. The default false value indicates that + blank values are to be ignored and treated as if they were + not included. + + strict_parsing: flag indicating what to do with parsing errors. + If false (the default), errors are silently ignored. + If true, errors raise a ValueError exception. + + encoding and errors: specify how to decode percent-encoded sequences + into Unicode characters, as accepted by the bytes.decode() method. + """ + parsed_result = {} + pairs = parse_qsl(qs, keep_blank_values, strict_parsing, + encoding=encoding, errors=errors) + for name, value in pairs: + if name in parsed_result: + parsed_result[name].append(value) + else: + parsed_result[name] = [value] + return parsed_result + +def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, + encoding='utf-8', errors='replace'): + """Parse a query given as a string argument. + + Arguments: + + qs: percent-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + percent-encoded queries should be treated as blank strings. A + true value indicates that blanks should be retained as blank + strings. The default false value indicates that blank values + are to be ignored and treated as if they were not included. + + strict_parsing: flag indicating what to do with parsing errors. If + false (the default), errors are silently ignored. If true, + errors raise a ValueError exception. + + encoding and errors: specify how to decode percent-encoded sequences + into Unicode characters, as accepted by the bytes.decode() method. + + Returns a list, as G-d intended. + """ + qs, _coerce_result = _coerce_args(qs) + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + r = [] + for name_value in pairs: + if not name_value and not strict_parsing: + continue + nv = name_value.split('=', 1) + if len(nv) != 2: + if strict_parsing: + raise ValueError("bad query field: %r" % (name_value,)) + # Handle case of a control-name with no equal sign + if keep_blank_values: + nv.append('') + else: + continue + if len(nv[1]) or keep_blank_values: + name = nv[0].replace('+', ' ') + name = unquote(name, encoding=encoding, errors=errors) + name = _coerce_result(name) + value = nv[1].replace('+', ' ') + value = unquote(value, encoding=encoding, errors=errors) + value = _coerce_result(value) + r.append((name, value)) + return r + +def unquote_plus(string, encoding='utf-8', errors='replace'): + """Like unquote(), but also replace plus signs by spaces, as required for + unquoting HTML form values. + + unquote_plus('%7e/abc+def') -> '~/abc def' + """ + string = string.replace('+', ' ') + return unquote(string, encoding, errors) + +_ALWAYS_SAFE = frozenset(bytes(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + b'abcdefghijklmnopqrstuvwxyz' + b'0123456789' + b'_.-')) +_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) +_safe_quoters = {} + +class Quoter(collections.defaultdict): + """A mapping from bytes (in range(0,256)) to strings. + + String values are percent-encoded byte values, unless the key < 128, and + in the "safe" set (either the specified safe set, or default set). + """ + # Keeps a cache internally, using defaultdict, for efficiency (lookups + # of cached keys don't call Python code at all). + def __init__(self, safe): + """safe: bytes object.""" + self.safe = _ALWAYS_SAFE.union(bytes(safe)) + + def __repr__(self): + # Without this, will just display as a defaultdict + return "" % dict(self) + + def __missing__(self, b): + # Handle a cache miss. Store quoted string in cache and return. + res = chr(b) if b in self.safe else '%{0:02X}'.format(b) + self[b] = res + return res + +def quote(string, safe='/', encoding=None, errors=None): + """quote('abc def') -> 'abc%20def' + + Each part of a URL, e.g. the path info, the query, etc., has a + different set of reserved characters that must be quoted. + + RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists + the following reserved characters. + + reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | + "$" | "," + + Each of these characters is reserved in some component of a URL, + but not necessarily in all of them. + + By default, the quote function is intended for quoting the path + section of a URL. Thus, it will not encode '/'. This character + is reserved, but in typical usage the quote function is being + called on a path where the existing slash characters are used as + reserved characters. + + string and safe may be either str or bytes objects. encoding must + not be specified if string is a str. + + The optional encoding and errors parameters specify how to deal with + non-ASCII characters, as accepted by the str.encode method. + By default, encoding='utf-8' (characters are encoded with UTF-8), and + errors='strict' (unsupported characters raise a UnicodeEncodeError). + """ + if isinstance(string, str): + if not string: + return string + if encoding is None: + encoding = 'utf-8' + if errors is None: + errors = 'strict' + string = string.encode(encoding, errors) + else: + if encoding is not None: + raise TypeError("quote() doesn't support 'encoding' for bytes") + if errors is not None: + raise TypeError("quote() doesn't support 'errors' for bytes") + return quote_from_bytes(string, safe) + +def quote_plus(string, safe='', encoding=None, errors=None): + """Like quote(), but also replace ' ' with '+', as required for quoting + HTML form values. Plus signs in the original string are escaped unless + they are included in safe. It also does not have safe default to '/'. + """ + # Check if ' ' in string, where string may either be a str or bytes. If + # there are no spaces, the regular quote will produce the right answer. + if ((isinstance(string, str) and ' ' not in string) or + (isinstance(string, bytes) and b' ' not in string)): + return quote(string, safe, encoding, errors) + if isinstance(safe, str): + space = str(' ') + else: + space = bytes(b' ') + string = quote(string, safe + space, encoding, errors) + return string.replace(' ', '+') + +def quote_from_bytes(bs, safe='/'): + """Like quote(), but accepts a bytes object rather than a str, and does + not perform string-to-bytes encoding. It always returns an ASCII string. + quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f' + """ + if not isinstance(bs, (bytes, bytearray)): + raise TypeError("quote_from_bytes() expected bytes") + if not bs: + return str('') + ### For Python-Future: + bs = bytes(bs) + ### + if isinstance(safe, str): + # Normalize 'safe' by converting to bytes and removing non-ASCII chars + safe = str(safe).encode('ascii', 'ignore') + else: + ### For Python-Future: + safe = bytes(safe) + ### + safe = bytes([c for c in safe if c < 128]) + if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): + return bs.decode() + try: + quoter = _safe_quoters[safe] + except KeyError: + _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ + return str('').join([quoter(char) for char in bs]) + +def urlencode(query, doseq=False, safe='', encoding=None, errors=None): + """Encode a sequence of two-element tuples or dictionary into a URL query string. + + If any values in the query arg are sequences and doseq is true, each + sequence element is converted to a separate parameter. + + If the query arg is a sequence of two-element tuples, the order of the + parameters in the output will match the order of parameters in the + input. + + The query arg may be either a string or a bytes type. When query arg is a + string, the safe, encoding and error parameters are sent the quote_plus for + encoding. + """ + + if hasattr(query, "items"): + query = query.items() + else: + # It's a bother at times that strings and string-like objects are + # sequences. + try: + # non-sequence items should not work with len() + # non-empty strings will fail this + if len(query) and not isinstance(query[0], tuple): + raise TypeError + # Zero-length sequences of all types will get here and succeed, + # but that's a minor nit. Since the original implementation + # allowed empty dicts that type of behavior probably should be + # preserved for consistency + except TypeError: + ty, va, tb = sys.exc_info() + raise_with_traceback(TypeError("not a valid non-string sequence " + "or mapping object"), tb) + + l = [] + if not doseq: + for k, v in query: + if isinstance(k, bytes): + k = quote_plus(k, safe) + else: + k = quote_plus(str(k), safe, encoding, errors) + + if isinstance(v, bytes): + v = quote_plus(v, safe) + else: + v = quote_plus(str(v), safe, encoding, errors) + l.append(k + '=' + v) + else: + for k, v in query: + if isinstance(k, bytes): + k = quote_plus(k, safe) + else: + k = quote_plus(str(k), safe, encoding, errors) + + if isinstance(v, bytes): + v = quote_plus(v, safe) + l.append(k + '=' + v) + elif isinstance(v, str): + v = quote_plus(v, safe, encoding, errors) + l.append(k + '=' + v) + else: + try: + # Is this a sufficient test for sequence-ness? + x = len(v) + except TypeError: + # not a sequence + v = quote_plus(str(v), safe, encoding, errors) + l.append(k + '=' + v) + else: + # loop over the sequence + for elt in v: + if isinstance(elt, bytes): + elt = quote_plus(elt, safe) + else: + elt = quote_plus(str(elt), safe, encoding, errors) + l.append(k + '=' + elt) + return str('&').join(l) + +# Utilities to parse URLs (most of these return None for missing parts): +# unwrap('') --> 'type://host/path' +# splittype('type:opaquestring') --> 'type', 'opaquestring' +# splithost('//host[:port]/path') --> 'host[:port]', '/path' +# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' +# splitpasswd('user:passwd') -> 'user', 'passwd' +# splitport('host:port') --> 'host', 'port' +# splitquery('/path?query') --> '/path', 'query' +# splittag('/path#tag') --> '/path', 'tag' +# splitattr('/path;attr1=value1;attr2=value2;...') -> +# '/path', ['attr1=value1', 'attr2=value2', ...] +# splitvalue('attr=value') --> 'attr', 'value' +# urllib.parse.unquote('abc%20def') -> 'abc def' +# quote('abc def') -> 'abc%20def') + +def to_bytes(url): + """to_bytes(u"URL") --> 'URL'.""" + # Most URL schemes require ASCII. If that changes, the conversion + # can be relaxed. + # XXX get rid of to_bytes() + if isinstance(url, str): + try: + url = url.encode("ASCII").decode() + except UnicodeError: + raise UnicodeError("URL " + repr(url) + + " contains non-ASCII characters") + return url + +def unwrap(url): + """unwrap('') --> 'type://host/path'.""" + url = str(url).strip() + if url[:1] == '<' and url[-1:] == '>': + url = url[1:-1].strip() + if url[:4] == 'URL:': url = url[4:].strip() + return url + +_typeprog = None +def splittype(url): + """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" + global _typeprog + if _typeprog is None: + import re + _typeprog = re.compile('^([^/:]+):') + + match = _typeprog.match(url) + if match: + scheme = match.group(1) + return scheme.lower(), url[len(scheme) + 1:] + return None, url + +_hostprog = None +def splithost(url): + """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" + global _hostprog + if _hostprog is None: + import re + _hostprog = re.compile('^//([^/?]*)(.*)$') + + match = _hostprog.match(url) + if match: + host_port = match.group(1) + path = match.group(2) + if path and not path.startswith('/'): + path = '/' + path + return host_port, path + return None, url + +_userprog = None +def splituser(host): + """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" + global _userprog + if _userprog is None: + import re + _userprog = re.compile('^(.*)@(.*)$') + + match = _userprog.match(host) + if match: return match.group(1, 2) + return None, host + +_passwdprog = None +def splitpasswd(user): + """splitpasswd('user:passwd') -> 'user', 'passwd'.""" + global _passwdprog + if _passwdprog is None: + import re + _passwdprog = re.compile('^([^:]*):(.*)$',re.S) + + match = _passwdprog.match(user) + if match: return match.group(1, 2) + return user, None + +# splittag('/path#tag') --> '/path', 'tag' +_portprog = None +def splitport(host): + """splitport('host:port') --> 'host', 'port'.""" + global _portprog + if _portprog is None: + import re + _portprog = re.compile('^(.*):([0-9]+)$') + + match = _portprog.match(host) + if match: return match.group(1, 2) + return host, None + +_nportprog = None +def splitnport(host, defport=-1): + """Split host and port, returning numeric port. + Return given default port if no ':' found; defaults to -1. + Return numerical port if a valid number are found after ':'. + Return None if ':' but not a valid number.""" + global _nportprog + if _nportprog is None: + import re + _nportprog = re.compile('^(.*):(.*)$') + + match = _nportprog.match(host) + if match: + host, port = match.group(1, 2) + try: + if not port: raise ValueError("no digits") + nport = int(port) + except ValueError: + nport = None + return host, nport + return host, defport + +_queryprog = None +def splitquery(url): + """splitquery('/path?query') --> '/path', 'query'.""" + global _queryprog + if _queryprog is None: + import re + _queryprog = re.compile('^(.*)\?([^?]*)$') + + match = _queryprog.match(url) + if match: return match.group(1, 2) + return url, None + +_tagprog = None +def splittag(url): + """splittag('/path#tag') --> '/path', 'tag'.""" + global _tagprog + if _tagprog is None: + import re + _tagprog = re.compile('^(.*)#([^#]*)$') + + match = _tagprog.match(url) + if match: return match.group(1, 2) + return url, None + +def splitattr(url): + """splitattr('/path;attr1=value1;attr2=value2;...') -> + '/path', ['attr1=value1', 'attr2=value2', ...].""" + words = url.split(';') + return words[0], words[1:] + +_valueprog = None +def splitvalue(attr): + """splitvalue('attr=value') --> 'attr', 'value'.""" + global _valueprog + if _valueprog is None: + import re + _valueprog = re.compile('^([^=]*)=(.*)$') + + match = _valueprog.match(attr) + if match: return match.group(1, 2) + return attr, None diff --git a/src/future/backports/urllib/request.py b/src/future/backports/urllib/request.py new file mode 100644 index 00000000..baee5401 --- /dev/null +++ b/src/future/backports/urllib/request.py @@ -0,0 +1,2647 @@ +""" +Ported using Python-Future from the Python 3.3 standard library. + +An extensible library for opening URLs using a variety of protocols + +The simplest way to use this module is to call the urlopen function, +which accepts a string containing a URL or a Request object (described +below). It opens the URL and returns the results as file-like +object; the returned object has some extra methods described below. + +The OpenerDirector manages a collection of Handler objects that do +all the actual work. Each Handler implements a particular protocol or +option. The OpenerDirector is a composite object that invokes the +Handlers needed to open the requested URL. For example, the +HTTPHandler performs HTTP GET and POST requests and deals with +non-error returns. The HTTPRedirectHandler automatically deals with +HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler +deals with digest authentication. + +urlopen(url, data=None) -- Basic usage is the same as original +urllib. pass the url and optionally data to post to an HTTP URL, and +get a file-like object back. One difference is that you can also pass +a Request instance instead of URL. Raises a URLError (subclass of +IOError); for HTTP errors, raises an HTTPError, which can also be +treated as a valid response. + +build_opener -- Function that creates a new OpenerDirector instance. +Will install the default handlers. Accepts one or more Handlers as +arguments, either instances or Handler classes that it will +instantiate. If one of the argument is a subclass of the default +handler, the argument will be installed instead of the default. + +install_opener -- Installs a new opener as the default opener. + +objects of interest: + +OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages +the Handler classes, while dealing with requests and responses. + +Request -- An object that encapsulates the state of a request. The +state can be as simple as the URL. It can also include extra HTTP +headers, e.g. a User-Agent. + +BaseHandler -- + +internals: +BaseHandler and parent +_call_chain conventions + +Example usage: + +import urllib.request + +# set up authentication info +authinfo = urllib.request.HTTPBasicAuthHandler() +authinfo.add_password(realm='PDQ Application', + uri='https://mahler:8092/site-updates.py', + user='klem', + passwd='geheim$parole') + +proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"}) + +# build a new opener that adds authentication and caching FTP handlers +opener = urllib.request.build_opener(proxy_support, authinfo, + urllib.request.CacheFTPHandler) + +# install it +urllib.request.install_opener(opener) + +f = urllib.request.urlopen('http://www.python.org/') +""" + +# XXX issues: +# If an authentication error handler that tries to perform +# authentication for some reason but fails, how should the error be +# signalled? The client needs to know the HTTP error code. But if +# the handler knows that the problem was, e.g., that it didn't know +# that hash algo that requested in the challenge, it would be good to +# pass that information along to the client, too. +# ftp errors aren't handled cleanly +# check digest against correct (i.e. non-apache) implementation + +# Possible extensions: +# complex proxies XXX not sure what exactly was meant by this +# abstract factory for opener + +from __future__ import absolute_import, division, print_function, unicode_literals +from future.builtins import bytes, dict, filter, input, int, map, open, str +from future.utils import PY2, PY3, raise_with_traceback + +import base64 +import bisect +import hashlib +import array + +from future.backports import email +from future.backports.http import client as http_client +from .error import URLError, HTTPError, ContentTooShortError +from .parse import ( + urlparse, urlsplit, urljoin, unwrap, quote, unquote, + splittype, splithost, splitport, splituser, splitpasswd, + splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse) +from .response import addinfourl, addclosehook + +import io +import os +import posixpath +import re +import socket +import sys +import time +import tempfile +import contextlib +import warnings + +from future.utils import PY2 + +if PY2: + from collections import Iterable +else: + from collections.abc import Iterable + +# check for SSL +try: + import ssl + # Not available in the SSL module in Py2: + from ssl import SSLContext +except ImportError: + _have_ssl = False +else: + _have_ssl = True + +__all__ = [ + # Classes + 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler', + 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler', + 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', + 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', + 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', + 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', + 'UnknownHandler', 'HTTPErrorProcessor', + # Functions + 'urlopen', 'install_opener', 'build_opener', + 'pathname2url', 'url2pathname', 'getproxies', + # Legacy interface + 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener', +] + +# used in User-Agent header sent +__version__ = sys.version[:3] + +_opener = None +def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **_3to2kwargs): + if 'cadefault' in _3to2kwargs: cadefault = _3to2kwargs['cadefault']; del _3to2kwargs['cadefault'] + else: cadefault = False + if 'capath' in _3to2kwargs: capath = _3to2kwargs['capath']; del _3to2kwargs['capath'] + else: capath = None + if 'cafile' in _3to2kwargs: cafile = _3to2kwargs['cafile']; del _3to2kwargs['cafile'] + else: cafile = None + global _opener + if cafile or capath or cadefault: + if not _have_ssl: + raise ValueError('SSL support not available') + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.options |= ssl.OP_NO_SSLv2 + context.verify_mode = ssl.CERT_REQUIRED + if cafile or capath: + context.load_verify_locations(cafile, capath) + else: + context.set_default_verify_paths() + https_handler = HTTPSHandler(context=context, check_hostname=True) + opener = build_opener(https_handler) + elif _opener is None: + _opener = opener = build_opener() + else: + opener = _opener + return opener.open(url, data, timeout) + +def install_opener(opener): + global _opener + _opener = opener + +_url_tempfiles = [] +def urlretrieve(url, filename=None, reporthook=None, data=None): + """ + Retrieve a URL into a temporary location on disk. + + Requires a URL argument. If a filename is passed, it is used as + the temporary file location. The reporthook argument should be + a callable that accepts a block number, a read size, and the + total file size of the URL target. The data argument should be + valid URL encoded data. + + If a filename is passed and the URL points to a local resource, + the result is a copy from local file to new file. + + Returns a tuple containing the path to the newly created + data file as well as the resulting HTTPMessage object. + """ + url_type, path = splittype(url) + + with contextlib.closing(urlopen(url, data)) as fp: + headers = fp.info() + + # Just return the local path and the "headers" for file:// + # URLs. No sense in performing a copy unless requested. + if url_type == "file" and not filename: + return os.path.normpath(path), headers + + # Handle temporary file setup. + if filename: + tfp = open(filename, 'wb') + else: + tfp = tempfile.NamedTemporaryFile(delete=False) + filename = tfp.name + _url_tempfiles.append(filename) + + with tfp: + result = filename, headers + bs = 1024*8 + size = -1 + read = 0 + blocknum = 0 + if "content-length" in headers: + size = int(headers["Content-Length"]) + + if reporthook: + reporthook(blocknum, bs, size) + + while True: + block = fp.read(bs) + if not block: + break + read += len(block) + tfp.write(block) + blocknum += 1 + if reporthook: + reporthook(blocknum, bs, size) + + if size >= 0 and read < size: + raise ContentTooShortError( + "retrieval incomplete: got only %i out of %i bytes" + % (read, size), result) + + return result + +def urlcleanup(): + for temp_file in _url_tempfiles: + try: + os.unlink(temp_file) + except EnvironmentError: + pass + + del _url_tempfiles[:] + global _opener + if _opener: + _opener = None + +if PY3: + _cut_port_re = re.compile(r":\d+$", re.ASCII) +else: + _cut_port_re = re.compile(r":\d+$") + +def request_host(request): + + """Return request-host, as defined by RFC 2965. + + Variation from RFC: returned value is lowercased, for convenient + comparison. + + """ + url = request.full_url + host = urlparse(url)[1] + if host == "": + host = request.get_header("Host", "") + + # remove port, if present + host = _cut_port_re.sub("", host, 1) + return host.lower() + +class Request(object): + + def __init__(self, url, data=None, headers={}, + origin_req_host=None, unverifiable=False, + method=None): + # unwrap('') --> 'type://host/path' + self.full_url = unwrap(url) + self.full_url, self.fragment = splittag(self.full_url) + self.data = data + self.headers = {} + self._tunnel_host = None + for key, value in headers.items(): + self.add_header(key, value) + self.unredirected_hdrs = {} + if origin_req_host is None: + origin_req_host = request_host(self) + self.origin_req_host = origin_req_host + self.unverifiable = unverifiable + self.method = method + self._parse() + + def _parse(self): + self.type, rest = splittype(self.full_url) + if self.type is None: + raise ValueError("unknown url type: %r" % self.full_url) + self.host, self.selector = splithost(rest) + if self.host: + self.host = unquote(self.host) + + def get_method(self): + """Return a string indicating the HTTP request method.""" + if self.method is not None: + return self.method + elif self.data is not None: + return "POST" + else: + return "GET" + + def get_full_url(self): + if self.fragment: + return '%s#%s' % (self.full_url, self.fragment) + else: + return self.full_url + + # Begin deprecated methods + + def add_data(self, data): + msg = "Request.add_data method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + self.data = data + + def has_data(self): + msg = "Request.has_data method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + return self.data is not None + + def get_data(self): + msg = "Request.get_data method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + return self.data + + def get_type(self): + msg = "Request.get_type method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + return self.type + + def get_host(self): + msg = "Request.get_host method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + return self.host + + def get_selector(self): + msg = "Request.get_selector method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + return self.selector + + def is_unverifiable(self): + msg = "Request.is_unverifiable method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + return self.unverifiable + + def get_origin_req_host(self): + msg = "Request.get_origin_req_host method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + return self.origin_req_host + + # End deprecated methods + + def set_proxy(self, host, type): + if self.type == 'https' and not self._tunnel_host: + self._tunnel_host = self.host + else: + self.type= type + self.selector = self.full_url + self.host = host + + def has_proxy(self): + return self.selector == self.full_url + + def add_header(self, key, val): + # useful for something like authentication + self.headers[key.capitalize()] = val + + def add_unredirected_header(self, key, val): + # will not be added to a redirected request + self.unredirected_hdrs[key.capitalize()] = val + + def has_header(self, header_name): + return (header_name in self.headers or + header_name in self.unredirected_hdrs) + + def get_header(self, header_name, default=None): + return self.headers.get( + header_name, + self.unredirected_hdrs.get(header_name, default)) + + def header_items(self): + hdrs = self.unredirected_hdrs.copy() + hdrs.update(self.headers) + return list(hdrs.items()) + +class OpenerDirector(object): + def __init__(self): + client_version = "Python-urllib/%s" % __version__ + self.addheaders = [('User-agent', client_version)] + # self.handlers is retained only for backward compatibility + self.handlers = [] + # manage the individual handlers + self.handle_open = {} + self.handle_error = {} + self.process_response = {} + self.process_request = {} + + def add_handler(self, handler): + if not hasattr(handler, "add_parent"): + raise TypeError("expected BaseHandler instance, got %r" % + type(handler)) + + added = False + for meth in dir(handler): + if meth in ["redirect_request", "do_open", "proxy_open"]: + # oops, coincidental match + continue + + i = meth.find("_") + protocol = meth[:i] + condition = meth[i+1:] + + if condition.startswith("error"): + j = condition.find("_") + i + 1 + kind = meth[j+1:] + try: + kind = int(kind) + except ValueError: + pass + lookup = self.handle_error.get(protocol, {}) + self.handle_error[protocol] = lookup + elif condition == "open": + kind = protocol + lookup = self.handle_open + elif condition == "response": + kind = protocol + lookup = self.process_response + elif condition == "request": + kind = protocol + lookup = self.process_request + else: + continue + + handlers = lookup.setdefault(kind, []) + if handlers: + bisect.insort(handlers, handler) + else: + handlers.append(handler) + added = True + + if added: + bisect.insort(self.handlers, handler) + handler.add_parent(self) + + def close(self): + # Only exists for backwards compatibility. + pass + + def _call_chain(self, chain, kind, meth_name, *args): + # Handlers raise an exception if no one else should try to handle + # the request, or return None if they can't but another handler + # could. Otherwise, they return the response. + handlers = chain.get(kind, ()) + for handler in handlers: + func = getattr(handler, meth_name) + result = func(*args) + if result is not None: + return result + + def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + """ + Accept a URL or a Request object + + Python-Future: if the URL is passed as a byte-string, decode it first. + """ + if isinstance(fullurl, bytes): + fullurl = fullurl.decode() + if isinstance(fullurl, str): + req = Request(fullurl, data) + else: + req = fullurl + if data is not None: + req.data = data + + req.timeout = timeout + protocol = req.type + + # pre-process request + meth_name = protocol+"_request" + for processor in self.process_request.get(protocol, []): + meth = getattr(processor, meth_name) + req = meth(req) + + response = self._open(req, data) + + # post-process response + meth_name = protocol+"_response" + for processor in self.process_response.get(protocol, []): + meth = getattr(processor, meth_name) + response = meth(req, response) + + return response + + def _open(self, req, data=None): + result = self._call_chain(self.handle_open, 'default', + 'default_open', req) + if result: + return result + + protocol = req.type + result = self._call_chain(self.handle_open, protocol, protocol + + '_open', req) + if result: + return result + + return self._call_chain(self.handle_open, 'unknown', + 'unknown_open', req) + + def error(self, proto, *args): + if proto in ('http', 'https'): + # XXX http[s] protocols are special-cased + dict = self.handle_error['http'] # https is not different than http + proto = args[2] # YUCK! + meth_name = 'http_error_%s' % proto + http_err = 1 + orig_args = args + else: + dict = self.handle_error + meth_name = proto + '_error' + http_err = 0 + args = (dict, proto, meth_name) + args + result = self._call_chain(*args) + if result: + return result + + if http_err: + args = (dict, 'default', 'http_error_default') + orig_args + return self._call_chain(*args) + +# XXX probably also want an abstract factory that knows when it makes +# sense to skip a superclass in favor of a subclass and when it might +# make sense to include both + +def build_opener(*handlers): + """Create an opener object from a list of handlers. + + The opener will use several default handlers, including support + for HTTP, FTP and when applicable HTTPS. + + If any of the handlers passed as arguments are subclasses of the + default handlers, the default handlers will not be used. + """ + def isclass(obj): + return isinstance(obj, type) or hasattr(obj, "__bases__") + + opener = OpenerDirector() + default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, + HTTPDefaultErrorHandler, HTTPRedirectHandler, + FTPHandler, FileHandler, HTTPErrorProcessor] + if hasattr(http_client, "HTTPSConnection"): + default_classes.append(HTTPSHandler) + skip = set() + for klass in default_classes: + for check in handlers: + if isclass(check): + if issubclass(check, klass): + skip.add(klass) + elif isinstance(check, klass): + skip.add(klass) + for klass in skip: + default_classes.remove(klass) + + for klass in default_classes: + opener.add_handler(klass()) + + for h in handlers: + if isclass(h): + h = h() + opener.add_handler(h) + return opener + +class BaseHandler(object): + handler_order = 500 + + def add_parent(self, parent): + self.parent = parent + + def close(self): + # Only exists for backwards compatibility + pass + + def __lt__(self, other): + if not hasattr(other, "handler_order"): + # Try to preserve the old behavior of having custom classes + # inserted after default ones (works only for custom user + # classes which are not aware of handler_order). + return True + return self.handler_order < other.handler_order + + +class HTTPErrorProcessor(BaseHandler): + """Process HTTP error responses.""" + handler_order = 1000 # after all other processing + + def http_response(self, request, response): + code, msg, hdrs = response.code, response.msg, response.info() + + # According to RFC 2616, "2xx" code indicates that the client's + # request was successfully received, understood, and accepted. + if not (200 <= code < 300): + response = self.parent.error( + 'http', request, response, code, msg, hdrs) + + return response + + https_response = http_response + +class HTTPDefaultErrorHandler(BaseHandler): + def http_error_default(self, req, fp, code, msg, hdrs): + raise HTTPError(req.full_url, code, msg, hdrs, fp) + +class HTTPRedirectHandler(BaseHandler): + # maximum number of redirections to any single URL + # this is needed because of the state that cookies introduce + max_repeats = 4 + # maximum total number of redirections (regardless of URL) before + # assuming we're in a loop + max_redirections = 10 + + def redirect_request(self, req, fp, code, msg, headers, newurl): + """Return a Request or None in response to a redirect. + + This is called by the http_error_30x methods when a + redirection response is received. If a redirection should + take place, return a new Request to allow http_error_30x to + perform the redirect. Otherwise, raise HTTPError if no-one + else should try to handle this url. Return None if you can't + but another Handler might. + """ + m = req.get_method() + if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD") + or code in (301, 302, 303) and m == "POST")): + raise HTTPError(req.full_url, code, msg, headers, fp) + + # Strictly (according to RFC 2616), 301 or 302 in response to + # a POST MUST NOT cause a redirection without confirmation + # from the user (of urllib.request, in this case). In practice, + # essentially all clients do redirect in this case, so we do + # the same. + # be conciliant with URIs containing a space + newurl = newurl.replace(' ', '%20') + CONTENT_HEADERS = ("content-length", "content-type") + newheaders = dict((k, v) for k, v in req.headers.items() + if k.lower() not in CONTENT_HEADERS) + return Request(newurl, + headers=newheaders, + origin_req_host=req.origin_req_host, + unverifiable=True) + + # Implementation note: To avoid the server sending us into an + # infinite loop, the request object needs to track what URLs we + # have already seen. Do this by adding a handler-specific + # attribute to the Request object. + def http_error_302(self, req, fp, code, msg, headers): + # Some servers (incorrectly) return multiple Location headers + # (so probably same goes for URI). Use first header. + if "location" in headers: + newurl = headers["location"] + elif "uri" in headers: + newurl = headers["uri"] + else: + return + + # fix a possible malformed URL + urlparts = urlparse(newurl) + + # For security reasons we don't allow redirection to anything other + # than http, https or ftp. + + if urlparts.scheme not in ('http', 'https', 'ftp', ''): + raise HTTPError( + newurl, code, + "%s - Redirection to url '%s' is not allowed" % (msg, newurl), + headers, fp) + + if not urlparts.path: + urlparts = list(urlparts) + urlparts[2] = "/" + newurl = urlunparse(urlparts) + + newurl = urljoin(req.full_url, newurl) + + # XXX Probably want to forget about the state of the current + # request, although that might interact poorly with other + # handlers that also use handler-specific request attributes + new = self.redirect_request(req, fp, code, msg, headers, newurl) + if new is None: + return + + # loop detection + # .redirect_dict has a key url if url was previously visited. + if hasattr(req, 'redirect_dict'): + visited = new.redirect_dict = req.redirect_dict + if (visited.get(newurl, 0) >= self.max_repeats or + len(visited) >= self.max_redirections): + raise HTTPError(req.full_url, code, + self.inf_msg + msg, headers, fp) + else: + visited = new.redirect_dict = req.redirect_dict = {} + visited[newurl] = visited.get(newurl, 0) + 1 + + # Don't close the fp until we are sure that we won't use it + # with HTTPError. + fp.read() + fp.close() + + return self.parent.open(new, timeout=req.timeout) + + http_error_301 = http_error_303 = http_error_307 = http_error_302 + + inf_msg = "The HTTP server returned a redirect error that would " \ + "lead to an infinite loop.\n" \ + "The last 30x error message was:\n" + + +def _parse_proxy(proxy): + """Return (scheme, user, password, host/port) given a URL or an authority. + + If a URL is supplied, it must have an authority (host:port) component. + According to RFC 3986, having an authority component means the URL must + have two slashes after the scheme: + + >>> _parse_proxy('file:/ftp.example.com/') + Traceback (most recent call last): + ValueError: proxy URL with no authority: 'file:/ftp.example.com/' + + The first three items of the returned tuple may be None. + + Examples of authority parsing: + + >>> _parse_proxy('proxy.example.com') + (None, None, None, 'proxy.example.com') + >>> _parse_proxy('proxy.example.com:3128') + (None, None, None, 'proxy.example.com:3128') + + The authority component may optionally include userinfo (assumed to be + username:password): + + >>> _parse_proxy('joe:password@proxy.example.com') + (None, 'joe', 'password', 'proxy.example.com') + >>> _parse_proxy('joe:password@proxy.example.com:3128') + (None, 'joe', 'password', 'proxy.example.com:3128') + + Same examples, but with URLs instead: + + >>> _parse_proxy('http://proxy.example.com/') + ('http', None, None, 'proxy.example.com') + >>> _parse_proxy('http://proxy.example.com:3128/') + ('http', None, None, 'proxy.example.com:3128') + >>> _parse_proxy('http://joe:password@proxy.example.com/') + ('http', 'joe', 'password', 'proxy.example.com') + >>> _parse_proxy('http://joe:password@proxy.example.com:3128') + ('http', 'joe', 'password', 'proxy.example.com:3128') + + Everything after the authority is ignored: + + >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128') + ('ftp', 'joe', 'password', 'proxy.example.com') + + Test for no trailing '/' case: + + >>> _parse_proxy('http://joe:password@proxy.example.com') + ('http', 'joe', 'password', 'proxy.example.com') + + """ + scheme, r_scheme = splittype(proxy) + if not r_scheme.startswith("/"): + # authority + scheme = None + authority = proxy + else: + # URL + if not r_scheme.startswith("//"): + raise ValueError("proxy URL with no authority: %r" % proxy) + # We have an authority, so for RFC 3986-compliant URLs (by ss 3. + # and 3.3.), path is empty or starts with '/' + end = r_scheme.find("/", 2) + if end == -1: + end = None + authority = r_scheme[2:end] + userinfo, hostport = splituser(authority) + if userinfo is not None: + user, password = splitpasswd(userinfo) + else: + user = password = None + return scheme, user, password, hostport + +class ProxyHandler(BaseHandler): + # Proxies must be in front + handler_order = 100 + + def __init__(self, proxies=None): + if proxies is None: + proxies = getproxies() + assert hasattr(proxies, 'keys'), "proxies must be a mapping" + self.proxies = proxies + for type, url in proxies.items(): + setattr(self, '%s_open' % type, + lambda r, proxy=url, type=type, meth=self.proxy_open: + meth(r, proxy, type)) + + def proxy_open(self, req, proxy, type): + orig_type = req.type + proxy_type, user, password, hostport = _parse_proxy(proxy) + if proxy_type is None: + proxy_type = orig_type + + if req.host and proxy_bypass(req.host): + return None + + if user and password: + user_pass = '%s:%s' % (unquote(user), + unquote(password)) + creds = base64.b64encode(user_pass.encode()).decode("ascii") + req.add_header('Proxy-authorization', 'Basic ' + creds) + hostport = unquote(hostport) + req.set_proxy(hostport, proxy_type) + if orig_type == proxy_type or orig_type == 'https': + # let other handlers take care of it + return None + else: + # need to start over, because the other handlers don't + # grok the proxy's URL type + # e.g. if we have a constructor arg proxies like so: + # {'http': 'ftp://proxy.example.com'}, we may end up turning + # a request for http://acme.example.com/a into one for + # ftp://proxy.example.com/a + return self.parent.open(req, timeout=req.timeout) + +class HTTPPasswordMgr(object): + + def __init__(self): + self.passwd = {} + + def add_password(self, realm, uri, user, passwd): + # uri could be a single URI or a sequence + if isinstance(uri, str): + uri = [uri] + if realm not in self.passwd: + self.passwd[realm] = {} + for default_port in True, False: + reduced_uri = tuple( + [self.reduce_uri(u, default_port) for u in uri]) + self.passwd[realm][reduced_uri] = (user, passwd) + + def find_user_password(self, realm, authuri): + domains = self.passwd.get(realm, {}) + for default_port in True, False: + reduced_authuri = self.reduce_uri(authuri, default_port) + for uris, authinfo in domains.items(): + for uri in uris: + if self.is_suburi(uri, reduced_authuri): + return authinfo + return None, None + + def reduce_uri(self, uri, default_port=True): + """Accept authority or URI and extract only the authority and path.""" + # note HTTP URLs do not have a userinfo component + parts = urlsplit(uri) + if parts[1]: + # URI + scheme = parts[0] + authority = parts[1] + path = parts[2] or '/' + else: + # host or host:port + scheme = None + authority = uri + path = '/' + host, port = splitport(authority) + if default_port and port is None and scheme is not None: + dport = {"http": 80, + "https": 443, + }.get(scheme) + if dport is not None: + authority = "%s:%d" % (host, dport) + return authority, path + + def is_suburi(self, base, test): + """Check if test is below base in a URI tree + + Both args must be URIs in reduced form. + """ + if base == test: + return True + if base[0] != test[0]: + return False + common = posixpath.commonprefix((base[1], test[1])) + if len(common) == len(base[1]): + return True + return False + + +class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): + + def find_user_password(self, realm, authuri): + user, password = HTTPPasswordMgr.find_user_password(self, realm, + authuri) + if user is not None: + return user, password + return HTTPPasswordMgr.find_user_password(self, None, authuri) + + +class AbstractBasicAuthHandler(object): + + # XXX this allows for multiple auth-schemes, but will stupidly pick + # the last one with a realm specified. + + # allow for double- and single-quoted realm values + # (single quotes are a violation of the RFC, but appear in the wild) + rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+' + 'realm=(["\']?)([^"\']*)\\2', re.I) + + # XXX could pre-emptively send auth info already accepted (RFC 2617, + # end of section 2, and section 1.2 immediately after "credentials" + # production). + + def __init__(self, password_mgr=None): + if password_mgr is None: + password_mgr = HTTPPasswordMgr() + self.passwd = password_mgr + self.add_password = self.passwd.add_password + self.retried = 0 + + def reset_retry_count(self): + self.retried = 0 + + def http_error_auth_reqed(self, authreq, host, req, headers): + # host may be an authority (without userinfo) or a URL with an + # authority + # XXX could be multiple headers + authreq = headers.get(authreq, None) + + if self.retried > 5: + # retry sending the username:password 5 times before failing. + raise HTTPError(req.get_full_url(), 401, "basic auth failed", + headers, None) + else: + self.retried += 1 + + if authreq: + scheme = authreq.split()[0] + if scheme.lower() != 'basic': + raise ValueError("AbstractBasicAuthHandler does not" + " support the following scheme: '%s'" % + scheme) + else: + mo = AbstractBasicAuthHandler.rx.search(authreq) + if mo: + scheme, quote, realm = mo.groups() + if quote not in ['"',"'"]: + warnings.warn("Basic Auth Realm was unquoted", + UserWarning, 2) + if scheme.lower() == 'basic': + response = self.retry_http_basic_auth(host, req, realm) + if response and response.code != 401: + self.retried = 0 + return response + + def retry_http_basic_auth(self, host, req, realm): + user, pw = self.passwd.find_user_password(realm, host) + if pw is not None: + raw = "%s:%s" % (user, pw) + auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii") + if req.headers.get(self.auth_header, None) == auth: + return None + req.add_unredirected_header(self.auth_header, auth) + return self.parent.open(req, timeout=req.timeout) + else: + return None + + +class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): + + auth_header = 'Authorization' + + def http_error_401(self, req, fp, code, msg, headers): + url = req.full_url + response = self.http_error_auth_reqed('www-authenticate', + url, req, headers) + self.reset_retry_count() + return response + + +class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): + + auth_header = 'Proxy-authorization' + + def http_error_407(self, req, fp, code, msg, headers): + # http_error_auth_reqed requires that there is no userinfo component in + # authority. Assume there isn't one, since urllib.request does not (and + # should not, RFC 3986 s. 3.2.1) support requests for URLs containing + # userinfo. + authority = req.host + response = self.http_error_auth_reqed('proxy-authenticate', + authority, req, headers) + self.reset_retry_count() + return response + + +# Return n random bytes. +_randombytes = os.urandom + + +class AbstractDigestAuthHandler(object): + # Digest authentication is specified in RFC 2617. + + # XXX The client does not inspect the Authentication-Info header + # in a successful response. + + # XXX It should be possible to test this implementation against + # a mock server that just generates a static set of challenges. + + # XXX qop="auth-int" supports is shaky + + def __init__(self, passwd=None): + if passwd is None: + passwd = HTTPPasswordMgr() + self.passwd = passwd + self.add_password = self.passwd.add_password + self.retried = 0 + self.nonce_count = 0 + self.last_nonce = None + + def reset_retry_count(self): + self.retried = 0 + + def http_error_auth_reqed(self, auth_header, host, req, headers): + authreq = headers.get(auth_header, None) + if self.retried > 5: + # Don't fail endlessly - if we failed once, we'll probably + # fail a second time. Hm. Unless the Password Manager is + # prompting for the information. Crap. This isn't great + # but it's better than the current 'repeat until recursion + # depth exceeded' approach + raise HTTPError(req.full_url, 401, "digest auth failed", + headers, None) + else: + self.retried += 1 + if authreq: + scheme = authreq.split()[0] + if scheme.lower() == 'digest': + return self.retry_http_digest_auth(req, authreq) + elif scheme.lower() != 'basic': + raise ValueError("AbstractDigestAuthHandler does not support" + " the following scheme: '%s'" % scheme) + + def retry_http_digest_auth(self, req, auth): + token, challenge = auth.split(' ', 1) + chal = parse_keqv_list(filter(None, parse_http_list(challenge))) + auth = self.get_authorization(req, chal) + if auth: + auth_val = 'Digest %s' % auth + if req.headers.get(self.auth_header, None) == auth_val: + return None + req.add_unredirected_header(self.auth_header, auth_val) + resp = self.parent.open(req, timeout=req.timeout) + return resp + + def get_cnonce(self, nonce): + # The cnonce-value is an opaque + # quoted string value provided by the client and used by both client + # and server to avoid chosen plaintext attacks, to provide mutual + # authentication, and to provide some message integrity protection. + # This isn't a fabulous effort, but it's probably Good Enough. + s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime()) + b = s.encode("ascii") + _randombytes(8) + dig = hashlib.sha1(b).hexdigest() + return dig[:16] + + def get_authorization(self, req, chal): + try: + realm = chal['realm'] + nonce = chal['nonce'] + qop = chal.get('qop') + algorithm = chal.get('algorithm', 'MD5') + # mod_digest doesn't send an opaque, even though it isn't + # supposed to be optional + opaque = chal.get('opaque', None) + except KeyError: + return None + + H, KD = self.get_algorithm_impls(algorithm) + if H is None: + return None + + user, pw = self.passwd.find_user_password(realm, req.full_url) + if user is None: + return None + + # XXX not implemented yet + if req.data is not None: + entdig = self.get_entity_digest(req.data, chal) + else: + entdig = None + + A1 = "%s:%s:%s" % (user, realm, pw) + A2 = "%s:%s" % (req.get_method(), + # XXX selector: what about proxies and full urls + req.selector) + if qop == 'auth': + if nonce == self.last_nonce: + self.nonce_count += 1 + else: + self.nonce_count = 1 + self.last_nonce = nonce + ncvalue = '%08x' % self.nonce_count + cnonce = self.get_cnonce(nonce) + noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2)) + respdig = KD(H(A1), noncebit) + elif qop is None: + respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) + else: + # XXX handle auth-int. + raise URLError("qop '%s' is not supported." % qop) + + # XXX should the partial digests be encoded too? + + base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ + 'response="%s"' % (user, realm, nonce, req.selector, + respdig) + if opaque: + base += ', opaque="%s"' % opaque + if entdig: + base += ', digest="%s"' % entdig + base += ', algorithm="%s"' % algorithm + if qop: + base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) + return base + + def get_algorithm_impls(self, algorithm): + # lambdas assume digest modules are imported at the top level + if algorithm == 'MD5': + H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest() + elif algorithm == 'SHA': + H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest() + # XXX MD5-sess + KD = lambda s, d: H("%s:%s" % (s, d)) + return H, KD + + def get_entity_digest(self, data, chal): + # XXX not implemented yet + return None + + +class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): + """An authentication protocol defined by RFC 2069 + + Digest authentication improves on basic authentication because it + does not transmit passwords in the clear. + """ + + auth_header = 'Authorization' + handler_order = 490 # before Basic auth + + def http_error_401(self, req, fp, code, msg, headers): + host = urlparse(req.full_url)[1] + retry = self.http_error_auth_reqed('www-authenticate', + host, req, headers) + self.reset_retry_count() + return retry + + +class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): + + auth_header = 'Proxy-Authorization' + handler_order = 490 # before Basic auth + + def http_error_407(self, req, fp, code, msg, headers): + host = req.host + retry = self.http_error_auth_reqed('proxy-authenticate', + host, req, headers) + self.reset_retry_count() + return retry + +class AbstractHTTPHandler(BaseHandler): + + def __init__(self, debuglevel=0): + self._debuglevel = debuglevel + + def set_http_debuglevel(self, level): + self._debuglevel = level + + def do_request_(self, request): + host = request.host + if not host: + raise URLError('no host given') + + if request.data is not None: # POST + data = request.data + if isinstance(data, str): + msg = "POST data should be bytes or an iterable of bytes. " \ + "It cannot be of type str." + raise TypeError(msg) + if not request.has_header('Content-type'): + request.add_unredirected_header( + 'Content-type', + 'application/x-www-form-urlencoded') + if not request.has_header('Content-length'): + size = None + try: + ### For Python-Future: + if PY2 and isinstance(data, array.array): + # memoryviews of arrays aren't supported + # in Py2.7. (e.g. memoryview(array.array('I', + # [1, 2, 3, 4])) raises a TypeError.) + # So we calculate the size manually instead: + size = len(data) * data.itemsize + ### + else: + mv = memoryview(data) + size = len(mv) * mv.itemsize + except TypeError: + if isinstance(data, Iterable): + raise ValueError("Content-Length should be specified " + "for iterable data of type %r %r" % (type(data), + data)) + else: + request.add_unredirected_header( + 'Content-length', '%d' % size) + + sel_host = host + if request.has_proxy(): + scheme, sel = splittype(request.selector) + sel_host, sel_path = splithost(sel) + if not request.has_header('Host'): + request.add_unredirected_header('Host', sel_host) + for name, value in self.parent.addheaders: + name = name.capitalize() + if not request.has_header(name): + request.add_unredirected_header(name, value) + + return request + + def do_open(self, http_class, req, **http_conn_args): + """Return an HTTPResponse object for the request, using http_class. + + http_class must implement the HTTPConnection API from http.client. + """ + host = req.host + if not host: + raise URLError('no host given') + + # will parse host:port + h = http_class(host, timeout=req.timeout, **http_conn_args) + + headers = dict(req.unredirected_hdrs) + headers.update(dict((k, v) for k, v in req.headers.items() + if k not in headers)) + + # TODO(jhylton): Should this be redesigned to handle + # persistent connections? + + # We want to make an HTTP/1.1 request, but the addinfourl + # class isn't prepared to deal with a persistent connection. + # It will try to read all remaining data from the socket, + # which will block while the server waits for the next request. + # So make sure the connection gets closed after the (only) + # request. + headers["Connection"] = "close" + headers = dict((name.title(), val) for name, val in headers.items()) + + if req._tunnel_host: + tunnel_headers = {} + proxy_auth_hdr = "Proxy-Authorization" + if proxy_auth_hdr in headers: + tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] + # Proxy-Authorization should not be sent to origin + # server. + del headers[proxy_auth_hdr] + h.set_tunnel(req._tunnel_host, headers=tunnel_headers) + + try: + h.request(req.get_method(), req.selector, req.data, headers) + except socket.error as err: # timeout error + h.close() + raise URLError(err) + else: + r = h.getresponse() + # If the server does not send us a 'Connection: close' header, + # HTTPConnection assumes the socket should be left open. Manually + # mark the socket to be closed when this response object goes away. + if h.sock: + h.sock.close() + h.sock = None + + + r.url = req.get_full_url() + # This line replaces the .msg attribute of the HTTPResponse + # with .headers, because urllib clients expect the response to + # have the reason in .msg. It would be good to mark this + # attribute is deprecated and get then to use info() or + # .headers. + r.msg = r.reason + return r + + +class HTTPHandler(AbstractHTTPHandler): + + def http_open(self, req): + return self.do_open(http_client.HTTPConnection, req) + + http_request = AbstractHTTPHandler.do_request_ + +if hasattr(http_client, 'HTTPSConnection'): + + class HTTPSHandler(AbstractHTTPHandler): + + def __init__(self, debuglevel=0, context=None, check_hostname=None): + AbstractHTTPHandler.__init__(self, debuglevel) + self._context = context + self._check_hostname = check_hostname + + def https_open(self, req): + return self.do_open(http_client.HTTPSConnection, req, + context=self._context, check_hostname=self._check_hostname) + + https_request = AbstractHTTPHandler.do_request_ + + __all__.append('HTTPSHandler') + +class HTTPCookieProcessor(BaseHandler): + def __init__(self, cookiejar=None): + import future.backports.http.cookiejar as http_cookiejar + if cookiejar is None: + cookiejar = http_cookiejar.CookieJar() + self.cookiejar = cookiejar + + def http_request(self, request): + self.cookiejar.add_cookie_header(request) + return request + + def http_response(self, request, response): + self.cookiejar.extract_cookies(response, request) + return response + + https_request = http_request + https_response = http_response + +class UnknownHandler(BaseHandler): + def unknown_open(self, req): + type = req.type + raise URLError('unknown url type: %s' % type) + +def parse_keqv_list(l): + """Parse list of key=value strings where keys are not duplicated.""" + parsed = {} + for elt in l: + k, v = elt.split('=', 1) + if v[0] == '"' and v[-1] == '"': + v = v[1:-1] + parsed[k] = v + return parsed + +def parse_http_list(s): + """Parse lists as described by RFC 2068 Section 2. + + In particular, parse comma-separated lists where the elements of + the list may include quoted-strings. A quoted-string could + contain a comma. A non-quoted string could have quotes in the + middle. Neither commas nor quotes count if they are escaped. + Only double-quotes count, not single-quotes. + """ + res = [] + part = '' + + escape = quote = False + for cur in s: + if escape: + part += cur + escape = False + continue + if quote: + if cur == '\\': + escape = True + continue + elif cur == '"': + quote = False + part += cur + continue + + if cur == ',': + res.append(part) + part = '' + continue + + if cur == '"': + quote = True + + part += cur + + # append last part + if part: + res.append(part) + + return [part.strip() for part in res] + +class FileHandler(BaseHandler): + # Use local file or FTP depending on form of URL + def file_open(self, req): + url = req.selector + if url[:2] == '//' and url[2:3] != '/' and (req.host and + req.host != 'localhost'): + if not req.host is self.get_names(): + raise URLError("file:// scheme is supported only on localhost") + else: + return self.open_local_file(req) + + # names for the localhost + names = None + def get_names(self): + if FileHandler.names is None: + try: + FileHandler.names = tuple( + socket.gethostbyname_ex('localhost')[2] + + socket.gethostbyname_ex(socket.gethostname())[2]) + except socket.gaierror: + FileHandler.names = (socket.gethostbyname('localhost'),) + return FileHandler.names + + # not entirely sure what the rules are here + def open_local_file(self, req): + import future.backports.email.utils as email_utils + import mimetypes + host = req.host + filename = req.selector + localfile = url2pathname(filename) + try: + stats = os.stat(localfile) + size = stats.st_size + modified = email_utils.formatdate(stats.st_mtime, usegmt=True) + mtype = mimetypes.guess_type(filename)[0] + headers = email.message_from_string( + 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % + (mtype or 'text/plain', size, modified)) + if host: + host, port = splitport(host) + if not host or \ + (not port and _safe_gethostbyname(host) in self.get_names()): + if host: + origurl = 'file://' + host + filename + else: + origurl = 'file://' + filename + return addinfourl(open(localfile, 'rb'), headers, origurl) + except OSError as exp: + # users shouldn't expect OSErrors coming from urlopen() + raise URLError(exp) + raise URLError('file not on local host') + +def _safe_gethostbyname(host): + try: + return socket.gethostbyname(host) + except socket.gaierror: + return None + +class FTPHandler(BaseHandler): + def ftp_open(self, req): + import ftplib + import mimetypes + host = req.host + if not host: + raise URLError('ftp error: no host given') + host, port = splitport(host) + if port is None: + port = ftplib.FTP_PORT + else: + port = int(port) + + # username/password handling + user, host = splituser(host) + if user: + user, passwd = splitpasswd(user) + else: + passwd = None + host = unquote(host) + user = user or '' + passwd = passwd or '' + + try: + host = socket.gethostbyname(host) + except socket.error as msg: + raise URLError(msg) + path, attrs = splitattr(req.selector) + dirs = path.split('/') + dirs = list(map(unquote, dirs)) + dirs, file = dirs[:-1], dirs[-1] + if dirs and not dirs[0]: + dirs = dirs[1:] + try: + fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout) + type = file and 'I' or 'D' + for attr in attrs: + attr, value = splitvalue(attr) + if attr.lower() == 'type' and \ + value in ('a', 'A', 'i', 'I', 'd', 'D'): + type = value.upper() + fp, retrlen = fw.retrfile(file, type) + headers = "" + mtype = mimetypes.guess_type(req.full_url)[0] + if mtype: + headers += "Content-type: %s\n" % mtype + if retrlen is not None and retrlen >= 0: + headers += "Content-length: %d\n" % retrlen + headers = email.message_from_string(headers) + return addinfourl(fp, headers, req.full_url) + except ftplib.all_errors as exp: + exc = URLError('ftp error: %r' % exp) + raise_with_traceback(exc) + + def connect_ftp(self, user, passwd, host, port, dirs, timeout): + return ftpwrapper(user, passwd, host, port, dirs, timeout, + persistent=False) + +class CacheFTPHandler(FTPHandler): + # XXX would be nice to have pluggable cache strategies + # XXX this stuff is definitely not thread safe + def __init__(self): + self.cache = {} + self.timeout = {} + self.soonest = 0 + self.delay = 60 + self.max_conns = 16 + + def setTimeout(self, t): + self.delay = t + + def setMaxConns(self, m): + self.max_conns = m + + def connect_ftp(self, user, passwd, host, port, dirs, timeout): + key = user, host, port, '/'.join(dirs), timeout + if key in self.cache: + self.timeout[key] = time.time() + self.delay + else: + self.cache[key] = ftpwrapper(user, passwd, host, port, + dirs, timeout) + self.timeout[key] = time.time() + self.delay + self.check_cache() + return self.cache[key] + + def check_cache(self): + # first check for old ones + t = time.time() + if self.soonest <= t: + for k, v in list(self.timeout.items()): + if v < t: + self.cache[k].close() + del self.cache[k] + del self.timeout[k] + self.soonest = min(list(self.timeout.values())) + + # then check the size + if len(self.cache) == self.max_conns: + for k, v in list(self.timeout.items()): + if v == self.soonest: + del self.cache[k] + del self.timeout[k] + break + self.soonest = min(list(self.timeout.values())) + + def clear_cache(self): + for conn in self.cache.values(): + conn.close() + self.cache.clear() + self.timeout.clear() + + +# Code move from the old urllib module + +MAXFTPCACHE = 10 # Trim the ftp cache beyond this size + +# Helper for non-unix systems +if os.name == 'nt': + from nturl2path import url2pathname, pathname2url +else: + def url2pathname(pathname): + """OS-specific conversion from a relative URL of the 'file' scheme + to a file system path; not recommended for general use.""" + return unquote(pathname) + + def pathname2url(pathname): + """OS-specific conversion from a file system path to a relative URL + of the 'file' scheme; not recommended for general use.""" + return quote(pathname) + +# This really consists of two pieces: +# (1) a class which handles opening of all sorts of URLs +# (plus assorted utilities etc.) +# (2) a set of functions for parsing URLs +# XXX Should these be separated out into different modules? + + +ftpcache = {} +class URLopener(object): + """Class to open URLs. + This is a class rather than just a subroutine because we may need + more than one set of global protocol-specific options. + Note -- this is a base class for those who don't want the + automatic handling of errors type 302 (relocated) and 401 + (authorization needed).""" + + __tempfiles = None + + version = "Python-urllib/%s" % __version__ + + # Constructor + def __init__(self, proxies=None, **x509): + msg = "%(class)s style of invoking requests is deprecated. " \ + "Use newer urlopen functions/methods" % {'class': self.__class__.__name__} + warnings.warn(msg, DeprecationWarning, stacklevel=3) + if proxies is None: + proxies = getproxies() + assert hasattr(proxies, 'keys'), "proxies must be a mapping" + self.proxies = proxies + self.key_file = x509.get('key_file') + self.cert_file = x509.get('cert_file') + self.addheaders = [('User-Agent', self.version)] + self.__tempfiles = [] + self.__unlink = os.unlink # See cleanup() + self.tempcache = None + # Undocumented feature: if you assign {} to tempcache, + # it is used to cache files retrieved with + # self.retrieve(). This is not enabled by default + # since it does not work for changing documents (and I + # haven't got the logic to check expiration headers + # yet). + self.ftpcache = ftpcache + # Undocumented feature: you can use a different + # ftp cache by assigning to the .ftpcache member; + # in case you want logically independent URL openers + # XXX This is not threadsafe. Bah. + + def __del__(self): + self.close() + + def close(self): + self.cleanup() + + def cleanup(self): + # This code sometimes runs when the rest of this module + # has already been deleted, so it can't use any globals + # or import anything. + if self.__tempfiles: + for file in self.__tempfiles: + try: + self.__unlink(file) + except OSError: + pass + del self.__tempfiles[:] + if self.tempcache: + self.tempcache.clear() + + def addheader(self, *args): + """Add a header to be used by the HTTP interface only + e.g. u.addheader('Accept', 'sound/basic')""" + self.addheaders.append(args) + + # External interface + def open(self, fullurl, data=None): + """Use URLopener().open(file) instead of open(file, 'r').""" + fullurl = unwrap(to_bytes(fullurl)) + fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") + if self.tempcache and fullurl in self.tempcache: + filename, headers = self.tempcache[fullurl] + fp = open(filename, 'rb') + return addinfourl(fp, headers, fullurl) + urltype, url = splittype(fullurl) + if not urltype: + urltype = 'file' + if urltype in self.proxies: + proxy = self.proxies[urltype] + urltype, proxyhost = splittype(proxy) + host, selector = splithost(proxyhost) + url = (host, fullurl) # Signal special case to open_*() + else: + proxy = None + name = 'open_' + urltype + self.type = urltype + name = name.replace('-', '_') + if not hasattr(self, name): + if proxy: + return self.open_unknown_proxy(proxy, fullurl, data) + else: + return self.open_unknown(fullurl, data) + try: + if data is None: + return getattr(self, name)(url) + else: + return getattr(self, name)(url, data) + except HTTPError: + raise + except socket.error as msg: + raise_with_traceback(IOError('socket error', msg)) + + def open_unknown(self, fullurl, data=None): + """Overridable interface to open unknown URL type.""" + type, url = splittype(fullurl) + raise IOError('url error', 'unknown url type', type) + + def open_unknown_proxy(self, proxy, fullurl, data=None): + """Overridable interface to open unknown URL type.""" + type, url = splittype(fullurl) + raise IOError('url error', 'invalid proxy for %s' % type, proxy) + + # External interface + def retrieve(self, url, filename=None, reporthook=None, data=None): + """retrieve(url) returns (filename, headers) for a local object + or (tempfilename, headers) for a remote object.""" + url = unwrap(to_bytes(url)) + if self.tempcache and url in self.tempcache: + return self.tempcache[url] + type, url1 = splittype(url) + if filename is None and (not type or type == 'file'): + try: + fp = self.open_local_file(url1) + hdrs = fp.info() + fp.close() + return url2pathname(splithost(url1)[1]), hdrs + except IOError as msg: + pass + fp = self.open(url, data) + try: + headers = fp.info() + if filename: + tfp = open(filename, 'wb') + else: + import tempfile + garbage, path = splittype(url) + garbage, path = splithost(path or "") + path, garbage = splitquery(path or "") + path, garbage = splitattr(path or "") + suffix = os.path.splitext(path)[1] + (fd, filename) = tempfile.mkstemp(suffix) + self.__tempfiles.append(filename) + tfp = os.fdopen(fd, 'wb') + try: + result = filename, headers + if self.tempcache is not None: + self.tempcache[url] = result + bs = 1024*8 + size = -1 + read = 0 + blocknum = 0 + if "content-length" in headers: + size = int(headers["Content-Length"]) + if reporthook: + reporthook(blocknum, bs, size) + while 1: + block = fp.read(bs) + if not block: + break + read += len(block) + tfp.write(block) + blocknum += 1 + if reporthook: + reporthook(blocknum, bs, size) + finally: + tfp.close() + finally: + fp.close() + + # raise exception if actual size does not match content-length header + if size >= 0 and read < size: + raise ContentTooShortError( + "retrieval incomplete: got only %i out of %i bytes" + % (read, size), result) + + return result + + # Each method named open_ knows how to open that type of URL + + def _open_generic_http(self, connection_factory, url, data): + """Make an HTTP connection using connection_class. + + This is an internal method that should be called from + open_http() or open_https(). + + Arguments: + - connection_factory should take a host name and return an + HTTPConnection instance. + - url is the url to retrieval or a host, relative-path pair. + - data is payload for a POST request or None. + """ + + user_passwd = None + proxy_passwd= None + if isinstance(url, str): + host, selector = splithost(url) + if host: + user_passwd, host = splituser(host) + host = unquote(host) + realhost = host + else: + host, selector = url + # check whether the proxy contains authorization information + proxy_passwd, host = splituser(host) + # now we proceed with the url we want to obtain + urltype, rest = splittype(selector) + url = rest + user_passwd = None + if urltype.lower() != 'http': + realhost = None + else: + realhost, rest = splithost(rest) + if realhost: + user_passwd, realhost = splituser(realhost) + if user_passwd: + selector = "%s://%s%s" % (urltype, realhost, rest) + if proxy_bypass(realhost): + host = realhost + + if not host: raise IOError('http error', 'no host given') + + if proxy_passwd: + proxy_passwd = unquote(proxy_passwd) + proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii') + else: + proxy_auth = None + + if user_passwd: + user_passwd = unquote(user_passwd) + auth = base64.b64encode(user_passwd.encode()).decode('ascii') + else: + auth = None + http_conn = connection_factory(host) + headers = {} + if proxy_auth: + headers["Proxy-Authorization"] = "Basic %s" % proxy_auth + if auth: + headers["Authorization"] = "Basic %s" % auth + if realhost: + headers["Host"] = realhost + + # Add Connection:close as we don't support persistent connections yet. + # This helps in closing the socket and avoiding ResourceWarning + + headers["Connection"] = "close" + + for header, value in self.addheaders: + headers[header] = value + + if data is not None: + headers["Content-Type"] = "application/x-www-form-urlencoded" + http_conn.request("POST", selector, data, headers) + else: + http_conn.request("GET", selector, headers=headers) + + try: + response = http_conn.getresponse() + except http_client.BadStatusLine: + # something went wrong with the HTTP status line + raise URLError("http protocol error: bad status line") + + # According to RFC 2616, "2xx" code indicates that the client's + # request was successfully received, understood, and accepted. + if 200 <= response.status < 300: + return addinfourl(response, response.msg, "http:" + url, + response.status) + else: + return self.http_error( + url, response.fp, + response.status, response.reason, response.msg, data) + + def open_http(self, url, data=None): + """Use HTTP protocol.""" + return self._open_generic_http(http_client.HTTPConnection, url, data) + + def http_error(self, url, fp, errcode, errmsg, headers, data=None): + """Handle http errors. + + Derived class can override this, or provide specific handlers + named http_error_DDD where DDD is the 3-digit error code.""" + # First check if there's a specific handler for this error + name = 'http_error_%d' % errcode + if hasattr(self, name): + method = getattr(self, name) + if data is None: + result = method(url, fp, errcode, errmsg, headers) + else: + result = method(url, fp, errcode, errmsg, headers, data) + if result: return result + return self.http_error_default(url, fp, errcode, errmsg, headers) + + def http_error_default(self, url, fp, errcode, errmsg, headers): + """Default error handler: close the connection and raise IOError.""" + fp.close() + raise HTTPError(url, errcode, errmsg, headers, None) + + if _have_ssl: + def _https_connection(self, host): + return http_client.HTTPSConnection(host, + key_file=self.key_file, + cert_file=self.cert_file) + + def open_https(self, url, data=None): + """Use HTTPS protocol.""" + return self._open_generic_http(self._https_connection, url, data) + + def open_file(self, url): + """Use local file or FTP depending on form of URL.""" + if not isinstance(url, str): + raise URLError('file error: proxy support for file protocol currently not implemented') + if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': + raise ValueError("file:// scheme is supported only on localhost") + else: + return self.open_local_file(url) + + def open_local_file(self, url): + """Use local file.""" + import future.backports.email.utils as email_utils + import mimetypes + host, file = splithost(url) + localname = url2pathname(file) + try: + stats = os.stat(localname) + except OSError as e: + raise URLError(e.strerror, e.filename) + size = stats.st_size + modified = email_utils.formatdate(stats.st_mtime, usegmt=True) + mtype = mimetypes.guess_type(url)[0] + headers = email.message_from_string( + 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % + (mtype or 'text/plain', size, modified)) + if not host: + urlfile = file + if file[:1] == '/': + urlfile = 'file://' + file + return addinfourl(open(localname, 'rb'), headers, urlfile) + host, port = splitport(host) + if (not port + and socket.gethostbyname(host) in ((localhost(),) + thishost())): + urlfile = file + if file[:1] == '/': + urlfile = 'file://' + file + elif file[:2] == './': + raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) + return addinfourl(open(localname, 'rb'), headers, urlfile) + raise URLError('local file error: not on local host') + + def open_ftp(self, url): + """Use FTP protocol.""" + if not isinstance(url, str): + raise URLError('ftp error: proxy support for ftp protocol currently not implemented') + import mimetypes + host, path = splithost(url) + if not host: raise URLError('ftp error: no host given') + host, port = splitport(host) + user, host = splituser(host) + if user: user, passwd = splitpasswd(user) + else: passwd = None + host = unquote(host) + user = unquote(user or '') + passwd = unquote(passwd or '') + host = socket.gethostbyname(host) + if not port: + import ftplib + port = ftplib.FTP_PORT + else: + port = int(port) + path, attrs = splitattr(path) + path = unquote(path) + dirs = path.split('/') + dirs, file = dirs[:-1], dirs[-1] + if dirs and not dirs[0]: dirs = dirs[1:] + if dirs and not dirs[0]: dirs[0] = '/' + key = user, host, port, '/'.join(dirs) + # XXX thread unsafe! + if len(self.ftpcache) > MAXFTPCACHE: + # Prune the cache, rather arbitrarily + for k in self.ftpcache.keys(): + if k != key: + v = self.ftpcache[k] + del self.ftpcache[k] + v.close() + try: + if key not in self.ftpcache: + self.ftpcache[key] = \ + ftpwrapper(user, passwd, host, port, dirs) + if not file: type = 'D' + else: type = 'I' + for attr in attrs: + attr, value = splitvalue(attr) + if attr.lower() == 'type' and \ + value in ('a', 'A', 'i', 'I', 'd', 'D'): + type = value.upper() + (fp, retrlen) = self.ftpcache[key].retrfile(file, type) + mtype = mimetypes.guess_type("ftp:" + url)[0] + headers = "" + if mtype: + headers += "Content-Type: %s\n" % mtype + if retrlen is not None and retrlen >= 0: + headers += "Content-Length: %d\n" % retrlen + headers = email.message_from_string(headers) + return addinfourl(fp, headers, "ftp:" + url) + except ftperrors() as exp: + raise_with_traceback(URLError('ftp error %r' % exp)) + + def open_data(self, url, data=None): + """Use "data" URL.""" + if not isinstance(url, str): + raise URLError('data error: proxy support for data protocol currently not implemented') + # ignore POSTed data + # + # syntax of data URLs: + # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data + # mediatype := [ type "/" subtype ] *( ";" parameter ) + # data := *urlchar + # parameter := attribute "=" value + try: + [type, data] = url.split(',', 1) + except ValueError: + raise IOError('data error', 'bad data URL') + if not type: + type = 'text/plain;charset=US-ASCII' + semi = type.rfind(';') + if semi >= 0 and '=' not in type[semi:]: + encoding = type[semi+1:] + type = type[:semi] + else: + encoding = '' + msg = [] + msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', + time.gmtime(time.time()))) + msg.append('Content-type: %s' % type) + if encoding == 'base64': + # XXX is this encoding/decoding ok? + data = base64.decodebytes(data.encode('ascii')).decode('latin-1') + else: + data = unquote(data) + msg.append('Content-Length: %d' % len(data)) + msg.append('') + msg.append(data) + msg = '\n'.join(msg) + headers = email.message_from_string(msg) + f = io.StringIO(msg) + #f.fileno = None # needed for addinfourl + return addinfourl(f, headers, url) + + +class FancyURLopener(URLopener): + """Derived class with handlers for errors we can handle (perhaps).""" + + def __init__(self, *args, **kwargs): + URLopener.__init__(self, *args, **kwargs) + self.auth_cache = {} + self.tries = 0 + self.maxtries = 10 + + def http_error_default(self, url, fp, errcode, errmsg, headers): + """Default error handling -- don't raise an exception.""" + return addinfourl(fp, headers, "http:" + url, errcode) + + def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): + """Error 302 -- relocated (temporarily).""" + self.tries += 1 + if self.maxtries and self.tries >= self.maxtries: + if hasattr(self, "http_error_500"): + meth = self.http_error_500 + else: + meth = self.http_error_default + self.tries = 0 + return meth(url, fp, 500, + "Internal Server Error: Redirect Recursion", headers) + result = self.redirect_internal(url, fp, errcode, errmsg, headers, + data) + self.tries = 0 + return result + + def redirect_internal(self, url, fp, errcode, errmsg, headers, data): + if 'location' in headers: + newurl = headers['location'] + elif 'uri' in headers: + newurl = headers['uri'] + else: + return + fp.close() + + # In case the server sent a relative URL, join with original: + newurl = urljoin(self.type + ":" + url, newurl) + + urlparts = urlparse(newurl) + + # For security reasons, we don't allow redirection to anything other + # than http, https and ftp. + + # We are using newer HTTPError with older redirect_internal method + # This older method will get deprecated in 3.3 + + if urlparts.scheme not in ('http', 'https', 'ftp', ''): + raise HTTPError(newurl, errcode, + errmsg + + " Redirection to url '%s' is not allowed." % newurl, + headers, fp) + + return self.open(newurl) + + def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): + """Error 301 -- also relocated (permanently).""" + return self.http_error_302(url, fp, errcode, errmsg, headers, data) + + def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): + """Error 303 -- also relocated (essentially identical to 302).""" + return self.http_error_302(url, fp, errcode, errmsg, headers, data) + + def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): + """Error 307 -- relocated, but turn POST into error.""" + if data is None: + return self.http_error_302(url, fp, errcode, errmsg, headers, data) + else: + return self.http_error_default(url, fp, errcode, errmsg, headers) + + def http_error_401(self, url, fp, errcode, errmsg, headers, data=None, + retry=False): + """Error 401 -- authentication required. + This function supports Basic authentication only.""" + if 'www-authenticate' not in headers: + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + stuff = headers['www-authenticate'] + match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) + if not match: + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + scheme, realm = match.groups() + if scheme.lower() != 'basic': + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + if not retry: + URLopener.http_error_default(self, url, fp, errcode, errmsg, + headers) + name = 'retry_' + self.type + '_basic_auth' + if data is None: + return getattr(self,name)(url, realm) + else: + return getattr(self,name)(url, realm, data) + + def http_error_407(self, url, fp, errcode, errmsg, headers, data=None, + retry=False): + """Error 407 -- proxy authentication required. + This function supports Basic authentication only.""" + if 'proxy-authenticate' not in headers: + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + stuff = headers['proxy-authenticate'] + match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) + if not match: + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + scheme, realm = match.groups() + if scheme.lower() != 'basic': + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + if not retry: + URLopener.http_error_default(self, url, fp, errcode, errmsg, + headers) + name = 'retry_proxy_' + self.type + '_basic_auth' + if data is None: + return getattr(self,name)(url, realm) + else: + return getattr(self,name)(url, realm, data) + + def retry_proxy_http_basic_auth(self, url, realm, data=None): + host, selector = splithost(url) + newurl = 'http://' + host + selector + proxy = self.proxies['http'] + urltype, proxyhost = splittype(proxy) + proxyhost, proxyselector = splithost(proxyhost) + i = proxyhost.find('@') + 1 + proxyhost = proxyhost[i:] + user, passwd = self.get_user_passwd(proxyhost, realm, i) + if not (user or passwd): return None + proxyhost = "%s:%s@%s" % (quote(user, safe=''), + quote(passwd, safe=''), proxyhost) + self.proxies['http'] = 'http://' + proxyhost + proxyselector + if data is None: + return self.open(newurl) + else: + return self.open(newurl, data) + + def retry_proxy_https_basic_auth(self, url, realm, data=None): + host, selector = splithost(url) + newurl = 'https://' + host + selector + proxy = self.proxies['https'] + urltype, proxyhost = splittype(proxy) + proxyhost, proxyselector = splithost(proxyhost) + i = proxyhost.find('@') + 1 + proxyhost = proxyhost[i:] + user, passwd = self.get_user_passwd(proxyhost, realm, i) + if not (user or passwd): return None + proxyhost = "%s:%s@%s" % (quote(user, safe=''), + quote(passwd, safe=''), proxyhost) + self.proxies['https'] = 'https://' + proxyhost + proxyselector + if data is None: + return self.open(newurl) + else: + return self.open(newurl, data) + + def retry_http_basic_auth(self, url, realm, data=None): + host, selector = splithost(url) + i = host.find('@') + 1 + host = host[i:] + user, passwd = self.get_user_passwd(host, realm, i) + if not (user or passwd): return None + host = "%s:%s@%s" % (quote(user, safe=''), + quote(passwd, safe=''), host) + newurl = 'http://' + host + selector + if data is None: + return self.open(newurl) + else: + return self.open(newurl, data) + + def retry_https_basic_auth(self, url, realm, data=None): + host, selector = splithost(url) + i = host.find('@') + 1 + host = host[i:] + user, passwd = self.get_user_passwd(host, realm, i) + if not (user or passwd): return None + host = "%s:%s@%s" % (quote(user, safe=''), + quote(passwd, safe=''), host) + newurl = 'https://' + host + selector + if data is None: + return self.open(newurl) + else: + return self.open(newurl, data) + + def get_user_passwd(self, host, realm, clear_cache=0): + key = realm + '@' + host.lower() + if key in self.auth_cache: + if clear_cache: + del self.auth_cache[key] + else: + return self.auth_cache[key] + user, passwd = self.prompt_user_passwd(host, realm) + if user or passwd: self.auth_cache[key] = (user, passwd) + return user, passwd + + def prompt_user_passwd(self, host, realm): + """Override this in a GUI environment!""" + import getpass + try: + user = input("Enter username for %s at %s: " % (realm, host)) + passwd = getpass.getpass("Enter password for %s in %s at %s: " % + (user, realm, host)) + return user, passwd + except KeyboardInterrupt: + print() + return None, None + + +# Utility functions + +_localhost = None +def localhost(): + """Return the IP address of the magic hostname 'localhost'.""" + global _localhost + if _localhost is None: + _localhost = socket.gethostbyname('localhost') + return _localhost + +_thishost = None +def thishost(): + """Return the IP addresses of the current host.""" + global _thishost + if _thishost is None: + try: + _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2]) + except socket.gaierror: + _thishost = tuple(socket.gethostbyname_ex('localhost')[2]) + return _thishost + +_ftperrors = None +def ftperrors(): + """Return the set of errors raised by the FTP class.""" + global _ftperrors + if _ftperrors is None: + import ftplib + _ftperrors = ftplib.all_errors + return _ftperrors + +_noheaders = None +def noheaders(): + """Return an empty email Message object.""" + global _noheaders + if _noheaders is None: + _noheaders = email.message_from_string("") + return _noheaders + + +# Utility classes + +class ftpwrapper(object): + """Class used by open_ftp() for cache of open FTP connections.""" + + def __init__(self, user, passwd, host, port, dirs, timeout=None, + persistent=True): + self.user = user + self.passwd = passwd + self.host = host + self.port = port + self.dirs = dirs + self.timeout = timeout + self.refcount = 0 + self.keepalive = persistent + self.init() + + def init(self): + import ftplib + self.busy = 0 + self.ftp = ftplib.FTP() + self.ftp.connect(self.host, self.port, self.timeout) + self.ftp.login(self.user, self.passwd) + _target = '/'.join(self.dirs) + self.ftp.cwd(_target) + + def retrfile(self, file, type): + import ftplib + self.endtransfer() + if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 + else: cmd = 'TYPE ' + type; isdir = 0 + try: + self.ftp.voidcmd(cmd) + except ftplib.all_errors: + self.init() + self.ftp.voidcmd(cmd) + conn = None + if file and not isdir: + # Try to retrieve as a file + try: + cmd = 'RETR ' + file + conn, retrlen = self.ftp.ntransfercmd(cmd) + except ftplib.error_perm as reason: + if str(reason)[:3] != '550': + raise_with_traceback(URLError('ftp error: %r' % reason)) + if not conn: + # Set transfer mode to ASCII! + self.ftp.voidcmd('TYPE A') + # Try a directory listing. Verify that directory exists. + if file: + pwd = self.ftp.pwd() + try: + try: + self.ftp.cwd(file) + except ftplib.error_perm as reason: + ### Was: + # raise URLError('ftp error: %r' % reason) from reason + exc = URLError('ftp error: %r' % reason) + exc.__cause__ = reason + raise exc + finally: + self.ftp.cwd(pwd) + cmd = 'LIST ' + file + else: + cmd = 'LIST' + conn, retrlen = self.ftp.ntransfercmd(cmd) + self.busy = 1 + + ftpobj = addclosehook(conn.makefile('rb'), self.file_close) + self.refcount += 1 + conn.close() + # Pass back both a suitably decorated object and a retrieval length + return (ftpobj, retrlen) + + def endtransfer(self): + self.busy = 0 + + def close(self): + self.keepalive = False + if self.refcount <= 0: + self.real_close() + + def file_close(self): + self.endtransfer() + self.refcount -= 1 + if self.refcount <= 0 and not self.keepalive: + self.real_close() + + def real_close(self): + self.endtransfer() + try: + self.ftp.close() + except ftperrors(): + pass + +# Proxy handling +def getproxies_environment(): + """Return a dictionary of scheme -> proxy server URL mappings. + + Scan the environment for variables named _proxy; + this seems to be the standard convention. If you need a + different way, you can pass a proxies dictionary to the + [Fancy]URLopener constructor. + + """ + proxies = {} + for name, value in os.environ.items(): + name = name.lower() + if value and name[-6:] == '_proxy': + proxies[name[:-6]] = value + return proxies + +def proxy_bypass_environment(host): + """Test if proxies should not be used for a particular host. + + Checks the environment for a variable named no_proxy, which should + be a list of DNS suffixes separated by commas, or '*' for all hosts. + """ + no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') + # '*' is special case for always bypass + if no_proxy == '*': + return 1 + # strip port off host + hostonly, port = splitport(host) + # check if the host ends with any of the DNS suffixes + no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] + for name in no_proxy_list: + if name and (hostonly.endswith(name) or host.endswith(name)): + return 1 + # otherwise, don't bypass + return 0 + + +# This code tests an OSX specific data structure but is testable on all +# platforms +def _proxy_bypass_macosx_sysconf(host, proxy_settings): + """ + Return True iff this host shouldn't be accessed using a proxy + + This function uses the MacOSX framework SystemConfiguration + to fetch the proxy information. + + proxy_settings come from _scproxy._get_proxy_settings or get mocked ie: + { 'exclude_simple': bool, + 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16'] + } + """ + from fnmatch import fnmatch + + hostonly, port = splitport(host) + + def ip2num(ipAddr): + parts = ipAddr.split('.') + parts = list(map(int, parts)) + if len(parts) != 4: + parts = (parts + [0, 0, 0, 0])[:4] + return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] + + # Check for simple host names: + if '.' not in host: + if proxy_settings['exclude_simple']: + return True + + hostIP = None + + for value in proxy_settings.get('exceptions', ()): + # Items in the list are strings like these: *.local, 169.254/16 + if not value: continue + + m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) + if m is not None: + if hostIP is None: + try: + hostIP = socket.gethostbyname(hostonly) + hostIP = ip2num(hostIP) + except socket.error: + continue + + base = ip2num(m.group(1)) + mask = m.group(2) + if mask is None: + mask = 8 * (m.group(1).count('.') + 1) + else: + mask = int(mask[1:]) + mask = 32 - mask + + if (hostIP >> mask) == (base >> mask): + return True + + elif fnmatch(host, value): + return True + + return False + + +if sys.platform == 'darwin': + from _scproxy import _get_proxy_settings, _get_proxies + + def proxy_bypass_macosx_sysconf(host): + proxy_settings = _get_proxy_settings() + return _proxy_bypass_macosx_sysconf(host, proxy_settings) + + def getproxies_macosx_sysconf(): + """Return a dictionary of scheme -> proxy server URL mappings. + + This function uses the MacOSX framework SystemConfiguration + to fetch the proxy information. + """ + return _get_proxies() + + + + def proxy_bypass(host): + if getproxies_environment(): + return proxy_bypass_environment(host) + else: + return proxy_bypass_macosx_sysconf(host) + + def getproxies(): + return getproxies_environment() or getproxies_macosx_sysconf() + + +elif os.name == 'nt': + def getproxies_registry(): + """Return a dictionary of scheme -> proxy server URL mappings. + + Win32 uses the registry to store proxies. + + """ + proxies = {} + try: + import winreg + except ImportError: + # Std module, so should be around - but you never know! + return proxies + try: + internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, + r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') + proxyEnable = winreg.QueryValueEx(internetSettings, + 'ProxyEnable')[0] + if proxyEnable: + # Returned as Unicode but problems if not converted to ASCII + proxyServer = str(winreg.QueryValueEx(internetSettings, + 'ProxyServer')[0]) + if '=' in proxyServer: + # Per-protocol settings + for p in proxyServer.split(';'): + protocol, address = p.split('=', 1) + # See if address has a type:// prefix + if not re.match('^([^/:]+)://', address): + address = '%s://%s' % (protocol, address) + proxies[protocol] = address + else: + # Use one setting for all protocols + if proxyServer[:5] == 'http:': + proxies['http'] = proxyServer + else: + proxies['http'] = 'http://%s' % proxyServer + proxies['https'] = 'https://%s' % proxyServer + proxies['ftp'] = 'ftp://%s' % proxyServer + internetSettings.Close() + except (WindowsError, ValueError, TypeError): + # Either registry key not found etc, or the value in an + # unexpected format. + # proxies already set up to be empty so nothing to do + pass + return proxies + + def getproxies(): + """Return a dictionary of scheme -> proxy server URL mappings. + + Returns settings gathered from the environment, if specified, + or the registry. + + """ + return getproxies_environment() or getproxies_registry() + + def proxy_bypass_registry(host): + try: + import winreg + except ImportError: + # Std modules, so should be around - but you never know! + return 0 + try: + internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, + r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') + proxyEnable = winreg.QueryValueEx(internetSettings, + 'ProxyEnable')[0] + proxyOverride = str(winreg.QueryValueEx(internetSettings, + 'ProxyOverride')[0]) + # ^^^^ Returned as Unicode but problems if not converted to ASCII + except WindowsError: + return 0 + if not proxyEnable or not proxyOverride: + return 0 + # try to make a host list from name and IP address. + rawHost, port = splitport(host) + host = [rawHost] + try: + addr = socket.gethostbyname(rawHost) + if addr != rawHost: + host.append(addr) + except socket.error: + pass + try: + fqdn = socket.getfqdn(rawHost) + if fqdn != rawHost: + host.append(fqdn) + except socket.error: + pass + # make a check value list from the registry entry: replace the + # '' string by the localhost entry and the corresponding + # canonical entry. + proxyOverride = proxyOverride.split(';') + # now check if we match one of the registry values. + for test in proxyOverride: + if test == '': + if '.' not in rawHost: + return 1 + test = test.replace(".", r"\.") # mask dots + test = test.replace("*", r".*") # change glob sequence + test = test.replace("?", r".") # change glob char + for val in host: + if re.match(test, val, re.I): + return 1 + return 0 + + def proxy_bypass(host): + """Return a dictionary of scheme -> proxy server URL mappings. + + Returns settings gathered from the environment, if specified, + or the registry. + + """ + if getproxies_environment(): + return proxy_bypass_environment(host) + else: + return proxy_bypass_registry(host) + +else: + # By default use environment variables + getproxies = getproxies_environment + proxy_bypass = proxy_bypass_environment diff --git a/src/future/backports/urllib/response.py b/src/future/backports/urllib/response.py new file mode 100644 index 00000000..adbf6e5a --- /dev/null +++ b/src/future/backports/urllib/response.py @@ -0,0 +1,103 @@ +"""Response classes used by urllib. + +The base class, addbase, defines a minimal file-like interface, +including read() and readline(). The typical response object is an +addinfourl instance, which defines an info() method that returns +headers and a geturl() method that returns the url. +""" +from __future__ import absolute_import, division, unicode_literals +from future.builtins import object + +class addbase(object): + """Base class for addinfo and addclosehook.""" + + # XXX Add a method to expose the timeout on the underlying socket? + + def __init__(self, fp): + # TODO(jhylton): Is there a better way to delegate using io? + self.fp = fp + self.read = self.fp.read + self.readline = self.fp.readline + # TODO(jhylton): Make sure an object with readlines() is also iterable + if hasattr(self.fp, "readlines"): + self.readlines = self.fp.readlines + if hasattr(self.fp, "fileno"): + self.fileno = self.fp.fileno + else: + self.fileno = lambda: None + + def __iter__(self): + # Assigning `__iter__` to the instance doesn't work as intended + # because the iter builtin does something like `cls.__iter__(obj)` + # and thus fails to find the _bound_ method `obj.__iter__`. + # Returning just `self.fp` works for built-in file objects but + # might not work for general file-like objects. + return iter(self.fp) + + def __repr__(self): + return '<%s at %r whose fp = %r>' % (self.__class__.__name__, + id(self), self.fp) + + def close(self): + if self.fp: + self.fp.close() + self.fp = None + self.read = None + self.readline = None + self.readlines = None + self.fileno = None + self.__iter__ = None + self.__next__ = None + + def __enter__(self): + if self.fp is None: + raise ValueError("I/O operation on closed file") + return self + + def __exit__(self, type, value, traceback): + self.close() + +class addclosehook(addbase): + """Class to add a close hook to an open file.""" + + def __init__(self, fp, closehook, *hookargs): + addbase.__init__(self, fp) + self.closehook = closehook + self.hookargs = hookargs + + def close(self): + if self.closehook: + self.closehook(*self.hookargs) + self.closehook = None + self.hookargs = None + addbase.close(self) + +class addinfo(addbase): + """class to add an info() method to an open file.""" + + def __init__(self, fp, headers): + addbase.__init__(self, fp) + self.headers = headers + + def info(self): + return self.headers + +class addinfourl(addbase): + """class to add info() and geturl() methods to an open file.""" + + def __init__(self, fp, headers, url, code=None): + addbase.__init__(self, fp) + self.headers = headers + self.url = url + self.code = code + + def info(self): + return self.headers + + def getcode(self): + return self.code + + def geturl(self): + return self.url + +del absolute_import, division, unicode_literals, object diff --git a/src/future/backports/urllib/robotparser.py b/src/future/backports/urllib/robotparser.py new file mode 100644 index 00000000..a0f36511 --- /dev/null +++ b/src/future/backports/urllib/robotparser.py @@ -0,0 +1,211 @@ +from __future__ import absolute_import, division, unicode_literals +from future.builtins import str +""" robotparser.py + + Copyright (C) 2000 Bastian Kleineidam + + You can choose between two licenses when using this package: + 1) GNU GPLv2 + 2) PSF license for Python 2.2 + + The robots.txt Exclusion Protocol is implemented as specified in + http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html +""" + +# Was: import urllib.parse, urllib.request +from future.backports import urllib +from future.backports.urllib import parse as _parse, request as _request +urllib.parse = _parse +urllib.request = _request + + +__all__ = ["RobotFileParser"] + +class RobotFileParser(object): + """ This class provides a set of methods to read, parse and answer + questions about a single robots.txt file. + + """ + + def __init__(self, url=''): + self.entries = [] + self.default_entry = None + self.disallow_all = False + self.allow_all = False + self.set_url(url) + self.last_checked = 0 + + def mtime(self): + """Returns the time the robots.txt file was last fetched. + + This is useful for long-running web spiders that need to + check for new robots.txt files periodically. + + """ + return self.last_checked + + def modified(self): + """Sets the time the robots.txt file was last fetched to the + current time. + + """ + import time + self.last_checked = time.time() + + def set_url(self, url): + """Sets the URL referring to a robots.txt file.""" + self.url = url + self.host, self.path = urllib.parse.urlparse(url)[1:3] + + def read(self): + """Reads the robots.txt URL and feeds it to the parser.""" + try: + f = urllib.request.urlopen(self.url) + except urllib.error.HTTPError as err: + if err.code in (401, 403): + self.disallow_all = True + elif err.code >= 400: + self.allow_all = True + else: + raw = f.read() + self.parse(raw.decode("utf-8").splitlines()) + + def _add_entry(self, entry): + if "*" in entry.useragents: + # the default entry is considered last + if self.default_entry is None: + # the first default entry wins + self.default_entry = entry + else: + self.entries.append(entry) + + def parse(self, lines): + """Parse the input lines from a robots.txt file. + + We allow that a user-agent: line is not preceded by + one or more blank lines. + """ + # states: + # 0: start state + # 1: saw user-agent line + # 2: saw an allow or disallow line + state = 0 + entry = Entry() + + for line in lines: + if not line: + if state == 1: + entry = Entry() + state = 0 + elif state == 2: + self._add_entry(entry) + entry = Entry() + state = 0 + # remove optional comment and strip line + i = line.find('#') + if i >= 0: + line = line[:i] + line = line.strip() + if not line: + continue + line = line.split(':', 1) + if len(line) == 2: + line[0] = line[0].strip().lower() + line[1] = urllib.parse.unquote(line[1].strip()) + if line[0] == "user-agent": + if state == 2: + self._add_entry(entry) + entry = Entry() + entry.useragents.append(line[1]) + state = 1 + elif line[0] == "disallow": + if state != 0: + entry.rulelines.append(RuleLine(line[1], False)) + state = 2 + elif line[0] == "allow": + if state != 0: + entry.rulelines.append(RuleLine(line[1], True)) + state = 2 + if state == 2: + self._add_entry(entry) + + + def can_fetch(self, useragent, url): + """using the parsed robots.txt decide if useragent can fetch url""" + if self.disallow_all: + return False + if self.allow_all: + return True + # search for given user agent matches + # the first match counts + parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url)) + url = urllib.parse.urlunparse(('','',parsed_url.path, + parsed_url.params,parsed_url.query, parsed_url.fragment)) + url = urllib.parse.quote(url) + if not url: + url = "/" + for entry in self.entries: + if entry.applies_to(useragent): + return entry.allowance(url) + # try the default entry last + if self.default_entry: + return self.default_entry.allowance(url) + # agent not found ==> access granted + return True + + def __str__(self): + return ''.join([str(entry) + "\n" for entry in self.entries]) + + +class RuleLine(object): + """A rule line is a single "Allow:" (allowance==True) or "Disallow:" + (allowance==False) followed by a path.""" + def __init__(self, path, allowance): + if path == '' and not allowance: + # an empty value means allow all + allowance = True + self.path = urllib.parse.quote(path) + self.allowance = allowance + + def applies_to(self, filename): + return self.path == "*" or filename.startswith(self.path) + + def __str__(self): + return (self.allowance and "Allow" or "Disallow") + ": " + self.path + + +class Entry(object): + """An entry has one or more user-agents and zero or more rulelines""" + def __init__(self): + self.useragents = [] + self.rulelines = [] + + def __str__(self): + ret = [] + for agent in self.useragents: + ret.extend(["User-agent: ", agent, "\n"]) + for line in self.rulelines: + ret.extend([str(line), "\n"]) + return ''.join(ret) + + def applies_to(self, useragent): + """check if this entry applies to the specified agent""" + # split the name token and make it lower case + useragent = useragent.split("/")[0].lower() + for agent in self.useragents: + if agent == '*': + # we have the catch-all agent + return True + agent = agent.lower() + if agent in useragent: + return True + return False + + def allowance(self, filename): + """Preconditions: + - our agent applies to this entry + - filename is URL decoded""" + for line in self.rulelines: + if line.applies_to(filename): + return line.allowance + return True diff --git a/src/future/backports/xmlrpc/__init__.py b/src/future/backports/xmlrpc/__init__.py new file mode 100644 index 00000000..196d3788 --- /dev/null +++ b/src/future/backports/xmlrpc/__init__.py @@ -0,0 +1 @@ +# This directory is a Python package. diff --git a/src/future/backports/xmlrpc/client.py b/src/future/backports/xmlrpc/client.py new file mode 100644 index 00000000..0838f61a --- /dev/null +++ b/src/future/backports/xmlrpc/client.py @@ -0,0 +1,1497 @@ +# +# XML-RPC CLIENT LIBRARY +# $Id$ +# +# an XML-RPC client interface for Python. +# +# the marshalling and response parser code can also be used to +# implement XML-RPC servers. +# +# Notes: +# this version is designed to work with Python 2.1 or newer. +# +# History: +# 1999-01-14 fl Created +# 1999-01-15 fl Changed dateTime to use localtime +# 1999-01-16 fl Added Binary/base64 element, default to RPC2 service +# 1999-01-19 fl Fixed array data element (from Skip Montanaro) +# 1999-01-21 fl Fixed dateTime constructor, etc. +# 1999-02-02 fl Added fault handling, handle empty sequences, etc. +# 1999-02-10 fl Fixed problem with empty responses (from Skip Montanaro) +# 1999-06-20 fl Speed improvements, pluggable parsers/transports (0.9.8) +# 2000-11-28 fl Changed boolean to check the truth value of its argument +# 2001-02-24 fl Added encoding/Unicode/SafeTransport patches +# 2001-02-26 fl Added compare support to wrappers (0.9.9/1.0b1) +# 2001-03-28 fl Make sure response tuple is a singleton +# 2001-03-29 fl Don't require empty params element (from Nicholas Riley) +# 2001-06-10 fl Folded in _xmlrpclib accelerator support (1.0b2) +# 2001-08-20 fl Base xmlrpclib.Error on built-in Exception (from Paul Prescod) +# 2001-09-03 fl Allow Transport subclass to override getparser +# 2001-09-10 fl Lazy import of urllib, cgi, xmllib (20x import speedup) +# 2001-10-01 fl Remove containers from memo cache when done with them +# 2001-10-01 fl Use faster escape method (80% dumps speedup) +# 2001-10-02 fl More dumps microtuning +# 2001-10-04 fl Make sure import expat gets a parser (from Guido van Rossum) +# 2001-10-10 sm Allow long ints to be passed as ints if they don't overflow +# 2001-10-17 sm Test for int and long overflow (allows use on 64-bit systems) +# 2001-11-12 fl Use repr() to marshal doubles (from Paul Felix) +# 2002-03-17 fl Avoid buffered read when possible (from James Rucker) +# 2002-04-07 fl Added pythondoc comments +# 2002-04-16 fl Added __str__ methods to datetime/binary wrappers +# 2002-05-15 fl Added error constants (from Andrew Kuchling) +# 2002-06-27 fl Merged with Python CVS version +# 2002-10-22 fl Added basic authentication (based on code from Phillip Eby) +# 2003-01-22 sm Add support for the bool type +# 2003-02-27 gvr Remove apply calls +# 2003-04-24 sm Use cStringIO if available +# 2003-04-25 ak Add support for nil +# 2003-06-15 gn Add support for time.struct_time +# 2003-07-12 gp Correct marshalling of Faults +# 2003-10-31 mvl Add multicall support +# 2004-08-20 mvl Bump minimum supported Python version to 2.1 +# +# Copyright (c) 1999-2002 by Secret Labs AB. +# Copyright (c) 1999-2002 by Fredrik Lundh. +# +# info@pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The XML-RPC client interface is +# +# Copyright (c) 1999-2002 by Secret Labs AB +# Copyright (c) 1999-2002 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +""" +Ported using Python-Future from the Python 3.3 standard library. + +An XML-RPC client interface for Python. + +The marshalling and response parser code can also be used to +implement XML-RPC servers. + +Exported exceptions: + + Error Base class for client errors + ProtocolError Indicates an HTTP protocol error + ResponseError Indicates a broken response package + Fault Indicates an XML-RPC fault package + +Exported classes: + + ServerProxy Represents a logical connection to an XML-RPC server + + MultiCall Executor of boxcared xmlrpc requests + DateTime dateTime wrapper for an ISO 8601 string or time tuple or + localtime integer value to generate a "dateTime.iso8601" + XML-RPC value + Binary binary data wrapper + + Marshaller Generate an XML-RPC params chunk from a Python data structure + Unmarshaller Unmarshal an XML-RPC response from incoming XML event message + Transport Handles an HTTP transaction to an XML-RPC server + SafeTransport Handles an HTTPS transaction to an XML-RPC server + +Exported constants: + + (none) + +Exported functions: + + getparser Create instance of the fastest available parser & attach + to an unmarshalling object + dumps Convert an argument tuple or a Fault instance to an XML-RPC + request (or response, if the methodresponse option is used). + loads Convert an XML-RPC packet to unmarshalled data plus a method + name (None if not present). +""" + +from __future__ import (absolute_import, division, print_function, + unicode_literals) +from future.builtins import bytes, dict, int, range, str + +import base64 +import sys +if sys.version_info < (3, 9): + # Py2.7 compatibility hack + base64.encodebytes = base64.encodestring + base64.decodebytes = base64.decodestring +import time +from datetime import datetime +from future.backports.http import client as http_client +from future.backports.urllib import parse as urllib_parse +from future.utils import ensure_new_type +from xml.parsers import expat +import socket +import errno +from io import BytesIO +try: + import gzip +except ImportError: + gzip = None #python can be built without zlib/gzip support + +# -------------------------------------------------------------------- +# Internal stuff + +def escape(s): + s = s.replace("&", "&") + s = s.replace("<", "<") + return s.replace(">", ">",) + +# used in User-Agent header sent +__version__ = sys.version[:3] + +# xmlrpc integer limits +MAXINT = 2**31-1 +MININT = -2**31 + +# -------------------------------------------------------------------- +# Error constants (from Dan Libby's specification at +# http://xmlrpc-epi.sourceforge.net/specs/rfc.fault_codes.php) + +# Ranges of errors +PARSE_ERROR = -32700 +SERVER_ERROR = -32600 +APPLICATION_ERROR = -32500 +SYSTEM_ERROR = -32400 +TRANSPORT_ERROR = -32300 + +# Specific errors +NOT_WELLFORMED_ERROR = -32700 +UNSUPPORTED_ENCODING = -32701 +INVALID_ENCODING_CHAR = -32702 +INVALID_XMLRPC = -32600 +METHOD_NOT_FOUND = -32601 +INVALID_METHOD_PARAMS = -32602 +INTERNAL_ERROR = -32603 + +# -------------------------------------------------------------------- +# Exceptions + +## +# Base class for all kinds of client-side errors. + +class Error(Exception): + """Base class for client errors.""" + def __str__(self): + return repr(self) + +## +# Indicates an HTTP-level protocol error. This is raised by the HTTP +# transport layer, if the server returns an error code other than 200 +# (OK). +# +# @param url The target URL. +# @param errcode The HTTP error code. +# @param errmsg The HTTP error message. +# @param headers The HTTP header dictionary. + +class ProtocolError(Error): + """Indicates an HTTP protocol error.""" + def __init__(self, url, errcode, errmsg, headers): + Error.__init__(self) + self.url = url + self.errcode = errcode + self.errmsg = errmsg + self.headers = headers + def __repr__(self): + return ( + "" % + (self.url, self.errcode, self.errmsg) + ) + +## +# Indicates a broken XML-RPC response package. This exception is +# raised by the unmarshalling layer, if the XML-RPC response is +# malformed. + +class ResponseError(Error): + """Indicates a broken response package.""" + pass + +## +# Indicates an XML-RPC fault response package. This exception is +# raised by the unmarshalling layer, if the XML-RPC response contains +# a fault string. This exception can also be used as a class, to +# generate a fault XML-RPC message. +# +# @param faultCode The XML-RPC fault code. +# @param faultString The XML-RPC fault string. + +class Fault(Error): + """Indicates an XML-RPC fault package.""" + def __init__(self, faultCode, faultString, **extra): + Error.__init__(self) + self.faultCode = faultCode + self.faultString = faultString + def __repr__(self): + return "" % (ensure_new_type(self.faultCode), + ensure_new_type(self.faultString)) + +# -------------------------------------------------------------------- +# Special values + +## +# Backwards compatibility + +boolean = Boolean = bool + +## +# Wrapper for XML-RPC DateTime values. This converts a time value to +# the format used by XML-RPC. +#

    +# The value can be given as a datetime object, as a string in the +# format "yyyymmddThh:mm:ss", as a 9-item time tuple (as returned by +# time.localtime()), or an integer value (as returned by time.time()). +# The wrapper uses time.localtime() to convert an integer to a time +# tuple. +# +# @param value The time, given as a datetime object, an ISO 8601 string, +# a time tuple, or an integer time value. + + +### For Python-Future: +def _iso8601_format(value): + return "%04d%02d%02dT%02d:%02d:%02d" % ( + value.year, value.month, value.day, + value.hour, value.minute, value.second) +### +# Issue #13305: different format codes across platforms +# _day0 = datetime(1, 1, 1) +# if _day0.strftime('%Y') == '0001': # Mac OS X +# def _iso8601_format(value): +# return value.strftime("%Y%m%dT%H:%M:%S") +# elif _day0.strftime('%4Y') == '0001': # Linux +# def _iso8601_format(value): +# return value.strftime("%4Y%m%dT%H:%M:%S") +# else: +# def _iso8601_format(value): +# return value.strftime("%Y%m%dT%H:%M:%S").zfill(17) +# del _day0 + + +def _strftime(value): + if isinstance(value, datetime): + return _iso8601_format(value) + + if not isinstance(value, (tuple, time.struct_time)): + if value == 0: + value = time.time() + value = time.localtime(value) + + return "%04d%02d%02dT%02d:%02d:%02d" % value[:6] + +class DateTime(object): + """DateTime wrapper for an ISO 8601 string or time tuple or + localtime integer value to generate 'dateTime.iso8601' XML-RPC + value. + """ + + def __init__(self, value=0): + if isinstance(value, str): + self.value = value + else: + self.value = _strftime(value) + + def make_comparable(self, other): + if isinstance(other, DateTime): + s = self.value + o = other.value + elif isinstance(other, datetime): + s = self.value + o = _iso8601_format(other) + elif isinstance(other, str): + s = self.value + o = other + elif hasattr(other, "timetuple"): + s = self.timetuple() + o = other.timetuple() + else: + otype = (hasattr(other, "__class__") + and other.__class__.__name__ + or type(other)) + raise TypeError("Can't compare %s and %s" % + (self.__class__.__name__, otype)) + return s, o + + def __lt__(self, other): + s, o = self.make_comparable(other) + return s < o + + def __le__(self, other): + s, o = self.make_comparable(other) + return s <= o + + def __gt__(self, other): + s, o = self.make_comparable(other) + return s > o + + def __ge__(self, other): + s, o = self.make_comparable(other) + return s >= o + + def __eq__(self, other): + s, o = self.make_comparable(other) + return s == o + + def __ne__(self, other): + s, o = self.make_comparable(other) + return s != o + + def timetuple(self): + return time.strptime(self.value, "%Y%m%dT%H:%M:%S") + + ## + # Get date/time value. + # + # @return Date/time value, as an ISO 8601 string. + + def __str__(self): + return self.value + + def __repr__(self): + return "" % (ensure_new_type(self.value), id(self)) + + def decode(self, data): + self.value = str(data).strip() + + def encode(self, out): + out.write("") + out.write(self.value) + out.write("\n") + +def _datetime(data): + # decode xml element contents into a DateTime structure. + value = DateTime() + value.decode(data) + return value + +def _datetime_type(data): + return datetime.strptime(data, "%Y%m%dT%H:%M:%S") + +## +# Wrapper for binary data. This can be used to transport any kind +# of binary data over XML-RPC, using BASE64 encoding. +# +# @param data An 8-bit string containing arbitrary data. + +class Binary(object): + """Wrapper for binary data.""" + + def __init__(self, data=None): + if data is None: + data = b"" + else: + if not isinstance(data, (bytes, bytearray)): + raise TypeError("expected bytes or bytearray, not %s" % + data.__class__.__name__) + data = bytes(data) # Make a copy of the bytes! + self.data = data + + ## + # Get buffer contents. + # + # @return Buffer contents, as an 8-bit string. + + def __str__(self): + return str(self.data, "latin-1") # XXX encoding?! + + def __eq__(self, other): + if isinstance(other, Binary): + other = other.data + return self.data == other + + def __ne__(self, other): + if isinstance(other, Binary): + other = other.data + return self.data != other + + def decode(self, data): + self.data = base64.decodebytes(data) + + def encode(self, out): + out.write("\n") + encoded = base64.encodebytes(self.data) + out.write(encoded.decode('ascii')) + out.write("\n") + +def _binary(data): + # decode xml element contents into a Binary structure + value = Binary() + value.decode(data) + return value + +WRAPPERS = (DateTime, Binary) + +# -------------------------------------------------------------------- +# XML parsers + +class ExpatParser(object): + # fast expat parser for Python 2.0 and later. + def __init__(self, target): + self._parser = parser = expat.ParserCreate(None, None) + self._target = target + parser.StartElementHandler = target.start + parser.EndElementHandler = target.end + parser.CharacterDataHandler = target.data + encoding = None + target.xml(encoding, None) + + def feed(self, data): + self._parser.Parse(data, 0) + + def close(self): + self._parser.Parse("", 1) # end of data + del self._target, self._parser # get rid of circular references + +# -------------------------------------------------------------------- +# XML-RPC marshalling and unmarshalling code + +## +# XML-RPC marshaller. +# +# @param encoding Default encoding for 8-bit strings. The default +# value is None (interpreted as UTF-8). +# @see dumps + +class Marshaller(object): + """Generate an XML-RPC params chunk from a Python data structure. + + Create a Marshaller instance for each set of parameters, and use + the "dumps" method to convert your data (represented as a tuple) + to an XML-RPC params chunk. To write a fault response, pass a + Fault instance instead. You may prefer to use the "dumps" module + function for this purpose. + """ + + # by the way, if you don't understand what's going on in here, + # that's perfectly ok. + + def __init__(self, encoding=None, allow_none=False): + self.memo = {} + self.data = None + self.encoding = encoding + self.allow_none = allow_none + + dispatch = {} + + def dumps(self, values): + out = [] + write = out.append + dump = self.__dump + if isinstance(values, Fault): + # fault instance + write("\n") + dump({'faultCode': values.faultCode, + 'faultString': values.faultString}, + write) + write("\n") + else: + # parameter block + # FIXME: the xml-rpc specification allows us to leave out + # the entire block if there are no parameters. + # however, changing this may break older code (including + # old versions of xmlrpclib.py), so this is better left as + # is for now. See @XMLRPC3 for more information. /F + write("\n") + for v in values: + write("\n") + dump(v, write) + write("\n") + write("\n") + result = "".join(out) + return str(result) + + def __dump(self, value, write): + try: + f = self.dispatch[type(ensure_new_type(value))] + except KeyError: + # check if this object can be marshalled as a structure + if not hasattr(value, '__dict__'): + raise TypeError("cannot marshal %s objects" % type(value)) + # check if this class is a sub-class of a basic type, + # because we don't know how to marshal these types + # (e.g. a string sub-class) + for type_ in type(value).__mro__: + if type_ in self.dispatch.keys(): + raise TypeError("cannot marshal %s objects" % type(value)) + # XXX(twouters): using "_arbitrary_instance" as key as a quick-fix + # for the p3yk merge, this should probably be fixed more neatly. + f = self.dispatch["_arbitrary_instance"] + f(self, value, write) + + def dump_nil (self, value, write): + if not self.allow_none: + raise TypeError("cannot marshal None unless allow_none is enabled") + write("") + dispatch[type(None)] = dump_nil + + def dump_bool(self, value, write): + write("") + write(value and "1" or "0") + write("\n") + dispatch[bool] = dump_bool + + def dump_long(self, value, write): + if value > MAXINT or value < MININT: + raise OverflowError("long int exceeds XML-RPC limits") + write("") + write(str(int(value))) + write("\n") + dispatch[int] = dump_long + + # backward compatible + dump_int = dump_long + + def dump_double(self, value, write): + write("") + write(repr(ensure_new_type(value))) + write("\n") + dispatch[float] = dump_double + + def dump_unicode(self, value, write, escape=escape): + write("") + write(escape(value)) + write("\n") + dispatch[str] = dump_unicode + + def dump_bytes(self, value, write): + write("\n") + encoded = base64.encodebytes(value) + write(encoded.decode('ascii')) + write("\n") + dispatch[bytes] = dump_bytes + dispatch[bytearray] = dump_bytes + + def dump_array(self, value, write): + i = id(value) + if i in self.memo: + raise TypeError("cannot marshal recursive sequences") + self.memo[i] = None + dump = self.__dump + write("\n") + for v in value: + dump(v, write) + write("\n") + del self.memo[i] + dispatch[tuple] = dump_array + dispatch[list] = dump_array + + def dump_struct(self, value, write, escape=escape): + i = id(value) + if i in self.memo: + raise TypeError("cannot marshal recursive dictionaries") + self.memo[i] = None + dump = self.__dump + write("\n") + for k, v in value.items(): + write("\n") + if not isinstance(k, str): + raise TypeError("dictionary key must be string") + write("%s\n" % escape(k)) + dump(v, write) + write("\n") + write("\n") + del self.memo[i] + dispatch[dict] = dump_struct + + def dump_datetime(self, value, write): + write("") + write(_strftime(value)) + write("\n") + dispatch[datetime] = dump_datetime + + def dump_instance(self, value, write): + # check for special wrappers + if value.__class__ in WRAPPERS: + self.write = write + value.encode(self) + del self.write + else: + # store instance attributes as a struct (really?) + self.dump_struct(value.__dict__, write) + dispatch[DateTime] = dump_instance + dispatch[Binary] = dump_instance + # XXX(twouters): using "_arbitrary_instance" as key as a quick-fix + # for the p3yk merge, this should probably be fixed more neatly. + dispatch["_arbitrary_instance"] = dump_instance + +## +# XML-RPC unmarshaller. +# +# @see loads + +class Unmarshaller(object): + """Unmarshal an XML-RPC response, based on incoming XML event + messages (start, data, end). Call close() to get the resulting + data structure. + + Note that this reader is fairly tolerant, and gladly accepts bogus + XML-RPC data without complaining (but not bogus XML). + """ + + # and again, if you don't understand what's going on in here, + # that's perfectly ok. + + def __init__(self, use_datetime=False, use_builtin_types=False): + self._type = None + self._stack = [] + self._marks = [] + self._data = [] + self._methodname = None + self._encoding = "utf-8" + self.append = self._stack.append + self._use_datetime = use_builtin_types or use_datetime + self._use_bytes = use_builtin_types + + def close(self): + # return response tuple and target method + if self._type is None or self._marks: + raise ResponseError() + if self._type == "fault": + raise Fault(**self._stack[0]) + return tuple(self._stack) + + def getmethodname(self): + return self._methodname + + # + # event handlers + + def xml(self, encoding, standalone): + self._encoding = encoding + # FIXME: assert standalone == 1 ??? + + def start(self, tag, attrs): + # prepare to handle this element + if tag == "array" or tag == "struct": + self._marks.append(len(self._stack)) + self._data = [] + self._value = (tag == "value") + + def data(self, text): + self._data.append(text) + + def end(self, tag): + # call the appropriate end tag handler + try: + f = self.dispatch[tag] + except KeyError: + pass # unknown tag ? + else: + return f(self, "".join(self._data)) + + # + # accelerator support + + def end_dispatch(self, tag, data): + # dispatch data + try: + f = self.dispatch[tag] + except KeyError: + pass # unknown tag ? + else: + return f(self, data) + + # + # element decoders + + dispatch = {} + + def end_nil (self, data): + self.append(None) + self._value = 0 + dispatch["nil"] = end_nil + + def end_boolean(self, data): + if data == "0": + self.append(False) + elif data == "1": + self.append(True) + else: + raise TypeError("bad boolean value") + self._value = 0 + dispatch["boolean"] = end_boolean + + def end_int(self, data): + self.append(int(data)) + self._value = 0 + dispatch["i4"] = end_int + dispatch["i8"] = end_int + dispatch["int"] = end_int + + def end_double(self, data): + self.append(float(data)) + self._value = 0 + dispatch["double"] = end_double + + def end_string(self, data): + if self._encoding: + data = data.decode(self._encoding) + self.append(data) + self._value = 0 + dispatch["string"] = end_string + dispatch["name"] = end_string # struct keys are always strings + + def end_array(self, data): + mark = self._marks.pop() + # map arrays to Python lists + self._stack[mark:] = [self._stack[mark:]] + self._value = 0 + dispatch["array"] = end_array + + def end_struct(self, data): + mark = self._marks.pop() + # map structs to Python dictionaries + dict = {} + items = self._stack[mark:] + for i in range(0, len(items), 2): + dict[items[i]] = items[i+1] + self._stack[mark:] = [dict] + self._value = 0 + dispatch["struct"] = end_struct + + def end_base64(self, data): + value = Binary() + value.decode(data.encode("ascii")) + if self._use_bytes: + value = value.data + self.append(value) + self._value = 0 + dispatch["base64"] = end_base64 + + def end_dateTime(self, data): + value = DateTime() + value.decode(data) + if self._use_datetime: + value = _datetime_type(data) + self.append(value) + dispatch["dateTime.iso8601"] = end_dateTime + + def end_value(self, data): + # if we stumble upon a value element with no internal + # elements, treat it as a string element + if self._value: + self.end_string(data) + dispatch["value"] = end_value + + def end_params(self, data): + self._type = "params" + dispatch["params"] = end_params + + def end_fault(self, data): + self._type = "fault" + dispatch["fault"] = end_fault + + def end_methodName(self, data): + if self._encoding: + data = data.decode(self._encoding) + self._methodname = data + self._type = "methodName" # no params + dispatch["methodName"] = end_methodName + +## Multicall support +# + +class _MultiCallMethod(object): + # some lesser magic to store calls made to a MultiCall object + # for batch execution + def __init__(self, call_list, name): + self.__call_list = call_list + self.__name = name + def __getattr__(self, name): + return _MultiCallMethod(self.__call_list, "%s.%s" % (self.__name, name)) + def __call__(self, *args): + self.__call_list.append((self.__name, args)) + +class MultiCallIterator(object): + """Iterates over the results of a multicall. Exceptions are + raised in response to xmlrpc faults.""" + + def __init__(self, results): + self.results = results + + def __getitem__(self, i): + item = self.results[i] + if isinstance(type(item), dict): + raise Fault(item['faultCode'], item['faultString']) + elif type(item) == type([]): + return item[0] + else: + raise ValueError("unexpected type in multicall result") + +class MultiCall(object): + """server -> a object used to boxcar method calls + + server should be a ServerProxy object. + + Methods can be added to the MultiCall using normal + method call syntax e.g.: + + multicall = MultiCall(server_proxy) + multicall.add(2,3) + multicall.get_address("Guido") + + To execute the multicall, call the MultiCall object e.g.: + + add_result, address = multicall() + """ + + def __init__(self, server): + self.__server = server + self.__call_list = [] + + def __repr__(self): + return "" % id(self) + + __str__ = __repr__ + + def __getattr__(self, name): + return _MultiCallMethod(self.__call_list, name) + + def __call__(self): + marshalled_list = [] + for name, args in self.__call_list: + marshalled_list.append({'methodName' : name, 'params' : args}) + + return MultiCallIterator(self.__server.system.multicall(marshalled_list)) + +# -------------------------------------------------------------------- +# convenience functions + +FastMarshaller = FastParser = FastUnmarshaller = None + +## +# Create a parser object, and connect it to an unmarshalling instance. +# This function picks the fastest available XML parser. +# +# return A (parser, unmarshaller) tuple. + +def getparser(use_datetime=False, use_builtin_types=False): + """getparser() -> parser, unmarshaller + + Create an instance of the fastest available parser, and attach it + to an unmarshalling object. Return both objects. + """ + if FastParser and FastUnmarshaller: + if use_builtin_types: + mkdatetime = _datetime_type + mkbytes = base64.decodebytes + elif use_datetime: + mkdatetime = _datetime_type + mkbytes = _binary + else: + mkdatetime = _datetime + mkbytes = _binary + target = FastUnmarshaller(True, False, mkbytes, mkdatetime, Fault) + parser = FastParser(target) + else: + target = Unmarshaller(use_datetime=use_datetime, use_builtin_types=use_builtin_types) + if FastParser: + parser = FastParser(target) + else: + parser = ExpatParser(target) + return parser, target + +## +# Convert a Python tuple or a Fault instance to an XML-RPC packet. +# +# @def dumps(params, **options) +# @param params A tuple or Fault instance. +# @keyparam methodname If given, create a methodCall request for +# this method name. +# @keyparam methodresponse If given, create a methodResponse packet. +# If used with a tuple, the tuple must be a singleton (that is, +# it must contain exactly one element). +# @keyparam encoding The packet encoding. +# @return A string containing marshalled data. + +def dumps(params, methodname=None, methodresponse=None, encoding=None, + allow_none=False): + """data [,options] -> marshalled data + + Convert an argument tuple or a Fault instance to an XML-RPC + request (or response, if the methodresponse option is used). + + In addition to the data object, the following options can be given + as keyword arguments: + + methodname: the method name for a methodCall packet + + methodresponse: true to create a methodResponse packet. + If this option is used with a tuple, the tuple must be + a singleton (i.e. it can contain only one element). + + encoding: the packet encoding (default is UTF-8) + + All byte strings in the data structure are assumed to use the + packet encoding. Unicode strings are automatically converted, + where necessary. + """ + + assert isinstance(params, (tuple, Fault)), "argument must be tuple or Fault instance" + if isinstance(params, Fault): + methodresponse = 1 + elif methodresponse and isinstance(params, tuple): + assert len(params) == 1, "response tuple must be a singleton" + + if not encoding: + encoding = "utf-8" + + if FastMarshaller: + m = FastMarshaller(encoding) + else: + m = Marshaller(encoding, allow_none) + + data = m.dumps(params) + + if encoding != "utf-8": + xmlheader = "\n" % str(encoding) + else: + xmlheader = "\n" # utf-8 is default + + # standard XML-RPC wrappings + if methodname: + # a method call + if not isinstance(methodname, str): + methodname = methodname.encode(encoding) + data = ( + xmlheader, + "\n" + "", methodname, "\n", + data, + "\n" + ) + elif methodresponse: + # a method response, or a fault structure + data = ( + xmlheader, + "\n", + data, + "\n" + ) + else: + return data # return as is + return str("").join(data) + +## +# Convert an XML-RPC packet to a Python object. If the XML-RPC packet +# represents a fault condition, this function raises a Fault exception. +# +# @param data An XML-RPC packet, given as an 8-bit string. +# @return A tuple containing the unpacked data, and the method name +# (None if not present). +# @see Fault + +def loads(data, use_datetime=False, use_builtin_types=False): + """data -> unmarshalled data, method name + + Convert an XML-RPC packet to unmarshalled data plus a method + name (None if not present). + + If the XML-RPC packet represents a fault condition, this function + raises a Fault exception. + """ + p, u = getparser(use_datetime=use_datetime, use_builtin_types=use_builtin_types) + p.feed(data) + p.close() + return u.close(), u.getmethodname() + +## +# Encode a string using the gzip content encoding such as specified by the +# Content-Encoding: gzip +# in the HTTP header, as described in RFC 1952 +# +# @param data the unencoded data +# @return the encoded data + +def gzip_encode(data): + """data -> gzip encoded data + + Encode data using the gzip content encoding as described in RFC 1952 + """ + if not gzip: + raise NotImplementedError + f = BytesIO() + gzf = gzip.GzipFile(mode="wb", fileobj=f, compresslevel=1) + gzf.write(data) + gzf.close() + encoded = f.getvalue() + f.close() + return encoded + +## +# Decode a string using the gzip content encoding such as specified by the +# Content-Encoding: gzip +# in the HTTP header, as described in RFC 1952 +# +# @param data The encoded data +# @return the unencoded data +# @raises ValueError if data is not correctly coded. + +def gzip_decode(data): + """gzip encoded data -> unencoded data + + Decode data using the gzip content encoding as described in RFC 1952 + """ + if not gzip: + raise NotImplementedError + f = BytesIO(data) + gzf = gzip.GzipFile(mode="rb", fileobj=f) + try: + decoded = gzf.read() + except IOError: + raise ValueError("invalid data") + f.close() + gzf.close() + return decoded + +## +# Return a decoded file-like object for the gzip encoding +# as described in RFC 1952. +# +# @param response A stream supporting a read() method +# @return a file-like object that the decoded data can be read() from + +class GzipDecodedResponse(gzip.GzipFile if gzip else object): + """a file-like object to decode a response encoded with the gzip + method, as described in RFC 1952. + """ + def __init__(self, response): + #response doesn't support tell() and read(), required by + #GzipFile + if not gzip: + raise NotImplementedError + self.io = BytesIO(response.read()) + gzip.GzipFile.__init__(self, mode="rb", fileobj=self.io) + + def close(self): + gzip.GzipFile.close(self) + self.io.close() + + +# -------------------------------------------------------------------- +# request dispatcher + +class _Method(object): + # some magic to bind an XML-RPC method to an RPC server. + # supports "nested" methods (e.g. examples.getStateName) + def __init__(self, send, name): + self.__send = send + self.__name = name + def __getattr__(self, name): + return _Method(self.__send, "%s.%s" % (self.__name, name)) + def __call__(self, *args): + return self.__send(self.__name, args) + +## +# Standard transport class for XML-RPC over HTTP. +#

    +# You can create custom transports by subclassing this method, and +# overriding selected methods. + +class Transport(object): + """Handles an HTTP transaction to an XML-RPC server.""" + + # client identifier (may be overridden) + user_agent = "Python-xmlrpc/%s" % __version__ + + #if true, we'll request gzip encoding + accept_gzip_encoding = True + + # if positive, encode request using gzip if it exceeds this threshold + # note that many server will get confused, so only use it if you know + # that they can decode such a request + encode_threshold = None #None = don't encode + + def __init__(self, use_datetime=False, use_builtin_types=False): + self._use_datetime = use_datetime + self._use_builtin_types = use_builtin_types + self._connection = (None, None) + self._extra_headers = [] + + ## + # Send a complete request, and parse the response. + # Retry request if a cached connection has disconnected. + # + # @param host Target host. + # @param handler Target PRC handler. + # @param request_body XML-RPC request body. + # @param verbose Debugging flag. + # @return Parsed response. + + def request(self, host, handler, request_body, verbose=False): + #retry request once if cached connection has gone cold + for i in (0, 1): + try: + return self.single_request(host, handler, request_body, verbose) + except socket.error as e: + if i or e.errno not in (errno.ECONNRESET, errno.ECONNABORTED, errno.EPIPE): + raise + except http_client.BadStatusLine: #close after we sent request + if i: + raise + + def single_request(self, host, handler, request_body, verbose=False): + # issue XML-RPC request + try: + http_conn = self.send_request(host, handler, request_body, verbose) + resp = http_conn.getresponse() + if resp.status == 200: + self.verbose = verbose + return self.parse_response(resp) + + except Fault: + raise + except Exception: + #All unexpected errors leave connection in + # a strange state, so we clear it. + self.close() + raise + + #We got an error response. + #Discard any response data and raise exception + if resp.getheader("content-length", ""): + resp.read() + raise ProtocolError( + host + handler, + resp.status, resp.reason, + dict(resp.getheaders()) + ) + + + ## + # Create parser. + # + # @return A 2-tuple containing a parser and a unmarshaller. + + def getparser(self): + # get parser and unmarshaller + return getparser(use_datetime=self._use_datetime, + use_builtin_types=self._use_builtin_types) + + ## + # Get authorization info from host parameter + # Host may be a string, or a (host, x509-dict) tuple; if a string, + # it is checked for a "user:pw@host" format, and a "Basic + # Authentication" header is added if appropriate. + # + # @param host Host descriptor (URL or (URL, x509 info) tuple). + # @return A 3-tuple containing (actual host, extra headers, + # x509 info). The header and x509 fields may be None. + + def get_host_info(self, host): + + x509 = {} + if isinstance(host, tuple): + host, x509 = host + + auth, host = urllib_parse.splituser(host) + + if auth: + auth = urllib_parse.unquote_to_bytes(auth) + auth = base64.encodebytes(auth).decode("utf-8") + auth = "".join(auth.split()) # get rid of whitespace + extra_headers = [ + ("Authorization", "Basic " + auth) + ] + else: + extra_headers = [] + + return host, extra_headers, x509 + + ## + # Connect to server. + # + # @param host Target host. + # @return An HTTPConnection object + + def make_connection(self, host): + #return an existing connection if possible. This allows + #HTTP/1.1 keep-alive. + if self._connection and host == self._connection[0]: + return self._connection[1] + # create a HTTP connection object from a host descriptor + chost, self._extra_headers, x509 = self.get_host_info(host) + self._connection = host, http_client.HTTPConnection(chost) + return self._connection[1] + + ## + # Clear any cached connection object. + # Used in the event of socket errors. + # + def close(self): + if self._connection[1]: + self._connection[1].close() + self._connection = (None, None) + + ## + # Send HTTP request. + # + # @param host Host descriptor (URL or (URL, x509 info) tuple). + # @param handler Target RPC handler (a path relative to host) + # @param request_body The XML-RPC request body + # @param debug Enable debugging if debug is true. + # @return An HTTPConnection. + + def send_request(self, host, handler, request_body, debug): + connection = self.make_connection(host) + headers = self._extra_headers[:] + if debug: + connection.set_debuglevel(1) + if self.accept_gzip_encoding and gzip: + connection.putrequest("POST", handler, skip_accept_encoding=True) + headers.append(("Accept-Encoding", "gzip")) + else: + connection.putrequest("POST", handler) + headers.append(("Content-Type", "text/xml")) + headers.append(("User-Agent", self.user_agent)) + self.send_headers(connection, headers) + self.send_content(connection, request_body) + return connection + + ## + # Send request headers. + # This function provides a useful hook for subclassing + # + # @param connection httpConnection. + # @param headers list of key,value pairs for HTTP headers + + def send_headers(self, connection, headers): + for key, val in headers: + connection.putheader(key, val) + + ## + # Send request body. + # This function provides a useful hook for subclassing + # + # @param connection httpConnection. + # @param request_body XML-RPC request body. + + def send_content(self, connection, request_body): + #optionally encode the request + if (self.encode_threshold is not None and + self.encode_threshold < len(request_body) and + gzip): + connection.putheader("Content-Encoding", "gzip") + request_body = gzip_encode(request_body) + + connection.putheader("Content-Length", str(len(request_body))) + connection.endheaders(request_body) + + ## + # Parse response. + # + # @param file Stream. + # @return Response tuple and target method. + + def parse_response(self, response): + # read response data from httpresponse, and parse it + # Check for new http response object, otherwise it is a file object. + if hasattr(response, 'getheader'): + if response.getheader("Content-Encoding", "") == "gzip": + stream = GzipDecodedResponse(response) + else: + stream = response + else: + stream = response + + p, u = self.getparser() + + while 1: + data = stream.read(1024) + if not data: + break + if self.verbose: + print("body:", repr(data)) + p.feed(data) + + if stream is not response: + stream.close() + p.close() + + return u.close() + +## +# Standard transport class for XML-RPC over HTTPS. + +class SafeTransport(Transport): + """Handles an HTTPS transaction to an XML-RPC server.""" + + # FIXME: mostly untested + + def make_connection(self, host): + if self._connection and host == self._connection[0]: + return self._connection[1] + + if not hasattr(http_client, "HTTPSConnection"): + raise NotImplementedError( + "your version of http.client doesn't support HTTPS") + # create a HTTPS connection object from a host descriptor + # host may be a string, or a (host, x509-dict) tuple + chost, self._extra_headers, x509 = self.get_host_info(host) + self._connection = host, http_client.HTTPSConnection(chost, + None, **(x509 or {})) + return self._connection[1] + +## +# Standard server proxy. This class establishes a virtual connection +# to an XML-RPC server. +#

    +# This class is available as ServerProxy and Server. New code should +# use ServerProxy, to avoid confusion. +# +# @def ServerProxy(uri, **options) +# @param uri The connection point on the server. +# @keyparam transport A transport factory, compatible with the +# standard transport class. +# @keyparam encoding The default encoding used for 8-bit strings +# (default is UTF-8). +# @keyparam verbose Use a true value to enable debugging output. +# (printed to standard output). +# @see Transport + +class ServerProxy(object): + """uri [,options] -> a logical connection to an XML-RPC server + + uri is the connection point on the server, given as + scheme://host/target. + + The standard implementation always supports the "http" scheme. If + SSL socket support is available (Python 2.0), it also supports + "https". + + If the target part and the slash preceding it are both omitted, + "/RPC2" is assumed. + + The following options can be given as keyword arguments: + + transport: a transport factory + encoding: the request encoding (default is UTF-8) + + All 8-bit strings passed to the server proxy are assumed to use + the given encoding. + """ + + def __init__(self, uri, transport=None, encoding=None, verbose=False, + allow_none=False, use_datetime=False, use_builtin_types=False): + # establish a "logical" server connection + + # get the url + type, uri = urllib_parse.splittype(uri) + if type not in ("http", "https"): + raise IOError("unsupported XML-RPC protocol") + self.__host, self.__handler = urllib_parse.splithost(uri) + if not self.__handler: + self.__handler = "/RPC2" + + if transport is None: + if type == "https": + handler = SafeTransport + else: + handler = Transport + transport = handler(use_datetime=use_datetime, + use_builtin_types=use_builtin_types) + self.__transport = transport + + self.__encoding = encoding or 'utf-8' + self.__verbose = verbose + self.__allow_none = allow_none + + def __close(self): + self.__transport.close() + + def __request(self, methodname, params): + # call a method on the remote server + + request = dumps(params, methodname, encoding=self.__encoding, + allow_none=self.__allow_none).encode(self.__encoding) + + response = self.__transport.request( + self.__host, + self.__handler, + request, + verbose=self.__verbose + ) + + if len(response) == 1: + response = response[0] + + return response + + def __repr__(self): + return ( + "" % + (self.__host, self.__handler) + ) + + __str__ = __repr__ + + def __getattr__(self, name): + # magic method dispatcher + return _Method(self.__request, name) + + # note: to call a remote object with an non-standard name, use + # result getattr(server, "strange-python-name")(args) + + def __call__(self, attr): + """A workaround to get special attributes on the ServerProxy + without interfering with the magic __getattr__ + """ + if attr == "close": + return self.__close + elif attr == "transport": + return self.__transport + raise AttributeError("Attribute %r not found" % (attr,)) + +# compatibility + +Server = ServerProxy + +# -------------------------------------------------------------------- +# test code + +if __name__ == "__main__": + + # simple test program (from the XML-RPC specification) + + # local server, available from Lib/xmlrpc/server.py + server = ServerProxy("http://localhost:8000") + + try: + print(server.currentTime.getCurrentTime()) + except Error as v: + print("ERROR", v) + + multi = MultiCall(server) + multi.getData() + multi.pow(2,9) + multi.add(1,2) + try: + for response in multi(): + print(response) + except Error as v: + print("ERROR", v) diff --git a/src/future/backports/xmlrpc/server.py b/src/future/backports/xmlrpc/server.py new file mode 100644 index 00000000..28072bfe --- /dev/null +++ b/src/future/backports/xmlrpc/server.py @@ -0,0 +1,999 @@ +r""" +Ported using Python-Future from the Python 3.3 standard library. + +XML-RPC Servers. + +This module can be used to create simple XML-RPC servers +by creating a server and either installing functions, a +class instance, or by extending the SimpleXMLRPCServer +class. + +It can also be used to handle XML-RPC requests in a CGI +environment using CGIXMLRPCRequestHandler. + +The Doc* classes can be used to create XML-RPC servers that +serve pydoc-style documentation in response to HTTP +GET requests. This documentation is dynamically generated +based on the functions and methods registered with the +server. + +A list of possible usage patterns follows: + +1. Install functions: + +server = SimpleXMLRPCServer(("localhost", 8000)) +server.register_function(pow) +server.register_function(lambda x,y: x+y, 'add') +server.serve_forever() + +2. Install an instance: + +class MyFuncs: + def __init__(self): + # make all of the sys functions available through sys.func_name + import sys + self.sys = sys + def _listMethods(self): + # implement this method so that system.listMethods + # knows to advertise the sys methods + return list_public_methods(self) + \ + ['sys.' + method for method in list_public_methods(self.sys)] + def pow(self, x, y): return pow(x, y) + def add(self, x, y) : return x + y + +server = SimpleXMLRPCServer(("localhost", 8000)) +server.register_introspection_functions() +server.register_instance(MyFuncs()) +server.serve_forever() + +3. Install an instance with custom dispatch method: + +class Math: + def _listMethods(self): + # this method must be present for system.listMethods + # to work + return ['add', 'pow'] + def _methodHelp(self, method): + # this method must be present for system.methodHelp + # to work + if method == 'add': + return "add(2,3) => 5" + elif method == 'pow': + return "pow(x, y[, z]) => number" + else: + # By convention, return empty + # string if no help is available + return "" + def _dispatch(self, method, params): + if method == 'pow': + return pow(*params) + elif method == 'add': + return params[0] + params[1] + else: + raise ValueError('bad method') + +server = SimpleXMLRPCServer(("localhost", 8000)) +server.register_introspection_functions() +server.register_instance(Math()) +server.serve_forever() + +4. Subclass SimpleXMLRPCServer: + +class MathServer(SimpleXMLRPCServer): + def _dispatch(self, method, params): + try: + # We are forcing the 'export_' prefix on methods that are + # callable through XML-RPC to prevent potential security + # problems + func = getattr(self, 'export_' + method) + except AttributeError: + raise Exception('method "%s" is not supported' % method) + else: + return func(*params) + + def export_add(self, x, y): + return x + y + +server = MathServer(("localhost", 8000)) +server.serve_forever() + +5. CGI script: + +server = CGIXMLRPCRequestHandler() +server.register_function(pow) +server.handle_request() +""" + +from __future__ import absolute_import, division, print_function, unicode_literals +from future.builtins import int, str + +# Written by Brian Quinlan (brian@sweetapp.com). +# Based on code written by Fredrik Lundh. + +from future.backports.xmlrpc.client import Fault, dumps, loads, gzip_encode, gzip_decode +from future.backports.http.server import BaseHTTPRequestHandler +import future.backports.http.server as http_server +from future.backports import socketserver +import sys +import os +import re +import pydoc +import inspect +import traceback +try: + import fcntl +except ImportError: + fcntl = None + +def resolve_dotted_attribute(obj, attr, allow_dotted_names=True): + """resolve_dotted_attribute(a, 'b.c.d') => a.b.c.d + + Resolves a dotted attribute name to an object. Raises + an AttributeError if any attribute in the chain starts with a '_'. + + If the optional allow_dotted_names argument is false, dots are not + supported and this function operates similar to getattr(obj, attr). + """ + + if allow_dotted_names: + attrs = attr.split('.') + else: + attrs = [attr] + + for i in attrs: + if i.startswith('_'): + raise AttributeError( + 'attempt to access private attribute "%s"' % i + ) + else: + obj = getattr(obj,i) + return obj + +def list_public_methods(obj): + """Returns a list of attribute strings, found in the specified + object, which represent callable attributes""" + + return [member for member in dir(obj) + if not member.startswith('_') and + callable(getattr(obj, member))] + +class SimpleXMLRPCDispatcher(object): + """Mix-in class that dispatches XML-RPC requests. + + This class is used to register XML-RPC method handlers + and then to dispatch them. This class doesn't need to be + instanced directly when used by SimpleXMLRPCServer but it + can be instanced when used by the MultiPathXMLRPCServer + """ + + def __init__(self, allow_none=False, encoding=None, + use_builtin_types=False): + self.funcs = {} + self.instance = None + self.allow_none = allow_none + self.encoding = encoding or 'utf-8' + self.use_builtin_types = use_builtin_types + + def register_instance(self, instance, allow_dotted_names=False): + """Registers an instance to respond to XML-RPC requests. + + Only one instance can be installed at a time. + + If the registered instance has a _dispatch method then that + method will be called with the name of the XML-RPC method and + its parameters as a tuple + e.g. instance._dispatch('add',(2,3)) + + If the registered instance does not have a _dispatch method + then the instance will be searched to find a matching method + and, if found, will be called. Methods beginning with an '_' + are considered private and will not be called by + SimpleXMLRPCServer. + + If a registered function matches a XML-RPC request, then it + will be called instead of the registered instance. + + If the optional allow_dotted_names argument is true and the + instance does not have a _dispatch method, method names + containing dots are supported and resolved, as long as none of + the name segments start with an '_'. + + *** SECURITY WARNING: *** + + Enabling the allow_dotted_names options allows intruders + to access your module's global variables and may allow + intruders to execute arbitrary code on your machine. Only + use this option on a secure, closed network. + + """ + + self.instance = instance + self.allow_dotted_names = allow_dotted_names + + def register_function(self, function, name=None): + """Registers a function to respond to XML-RPC requests. + + The optional name argument can be used to set a Unicode name + for the function. + """ + + if name is None: + name = function.__name__ + self.funcs[name] = function + + def register_introspection_functions(self): + """Registers the XML-RPC introspection methods in the system + namespace. + + see http://xmlrpc.usefulinc.com/doc/reserved.html + """ + + self.funcs.update({'system.listMethods' : self.system_listMethods, + 'system.methodSignature' : self.system_methodSignature, + 'system.methodHelp' : self.system_methodHelp}) + + def register_multicall_functions(self): + """Registers the XML-RPC multicall method in the system + namespace. + + see http://www.xmlrpc.com/discuss/msgReader$1208""" + + self.funcs.update({'system.multicall' : self.system_multicall}) + + def _marshaled_dispatch(self, data, dispatch_method = None, path = None): + """Dispatches an XML-RPC method from marshalled (XML) data. + + XML-RPC methods are dispatched from the marshalled (XML) data + using the _dispatch method and the result is returned as + marshalled data. For backwards compatibility, a dispatch + function can be provided as an argument (see comment in + SimpleXMLRPCRequestHandler.do_POST) but overriding the + existing method through subclassing is the preferred means + of changing method dispatch behavior. + """ + + try: + params, method = loads(data, use_builtin_types=self.use_builtin_types) + + # generate response + if dispatch_method is not None: + response = dispatch_method(method, params) + else: + response = self._dispatch(method, params) + # wrap response in a singleton tuple + response = (response,) + response = dumps(response, methodresponse=1, + allow_none=self.allow_none, encoding=self.encoding) + except Fault as fault: + response = dumps(fault, allow_none=self.allow_none, + encoding=self.encoding) + except: + # report exception back to server + exc_type, exc_value, exc_tb = sys.exc_info() + response = dumps( + Fault(1, "%s:%s" % (exc_type, exc_value)), + encoding=self.encoding, allow_none=self.allow_none, + ) + + return response.encode(self.encoding) + + def system_listMethods(self): + """system.listMethods() => ['add', 'subtract', 'multiple'] + + Returns a list of the methods supported by the server.""" + + methods = set(self.funcs.keys()) + if self.instance is not None: + # Instance can implement _listMethod to return a list of + # methods + if hasattr(self.instance, '_listMethods'): + methods |= set(self.instance._listMethods()) + # if the instance has a _dispatch method then we + # don't have enough information to provide a list + # of methods + elif not hasattr(self.instance, '_dispatch'): + methods |= set(list_public_methods(self.instance)) + return sorted(methods) + + def system_methodSignature(self, method_name): + """system.methodSignature('add') => [double, int, int] + + Returns a list describing the signature of the method. In the + above example, the add method takes two integers as arguments + and returns a double result. + + This server does NOT support system.methodSignature.""" + + # See http://xmlrpc.usefulinc.com/doc/sysmethodsig.html + + return 'signatures not supported' + + def system_methodHelp(self, method_name): + """system.methodHelp('add') => "Adds two integers together" + + Returns a string containing documentation for the specified method.""" + + method = None + if method_name in self.funcs: + method = self.funcs[method_name] + elif self.instance is not None: + # Instance can implement _methodHelp to return help for a method + if hasattr(self.instance, '_methodHelp'): + return self.instance._methodHelp(method_name) + # if the instance has a _dispatch method then we + # don't have enough information to provide help + elif not hasattr(self.instance, '_dispatch'): + try: + method = resolve_dotted_attribute( + self.instance, + method_name, + self.allow_dotted_names + ) + except AttributeError: + pass + + # Note that we aren't checking that the method actually + # be a callable object of some kind + if method is None: + return "" + else: + return pydoc.getdoc(method) + + def system_multicall(self, call_list): + """system.multicall([{'methodName': 'add', 'params': [2, 2]}, ...]) => \ +[[4], ...] + + Allows the caller to package multiple XML-RPC calls into a single + request. + + See http://www.xmlrpc.com/discuss/msgReader$1208 + """ + + results = [] + for call in call_list: + method_name = call['methodName'] + params = call['params'] + + try: + # XXX A marshalling error in any response will fail the entire + # multicall. If someone cares they should fix this. + results.append([self._dispatch(method_name, params)]) + except Fault as fault: + results.append( + {'faultCode' : fault.faultCode, + 'faultString' : fault.faultString} + ) + except: + exc_type, exc_value, exc_tb = sys.exc_info() + results.append( + {'faultCode' : 1, + 'faultString' : "%s:%s" % (exc_type, exc_value)} + ) + return results + + def _dispatch(self, method, params): + """Dispatches the XML-RPC method. + + XML-RPC calls are forwarded to a registered function that + matches the called XML-RPC method name. If no such function + exists then the call is forwarded to the registered instance, + if available. + + If the registered instance has a _dispatch method then that + method will be called with the name of the XML-RPC method and + its parameters as a tuple + e.g. instance._dispatch('add',(2,3)) + + If the registered instance does not have a _dispatch method + then the instance will be searched to find a matching method + and, if found, will be called. + + Methods beginning with an '_' are considered private and will + not be called. + """ + + func = None + try: + # check to see if a matching function has been registered + func = self.funcs[method] + except KeyError: + if self.instance is not None: + # check for a _dispatch method + if hasattr(self.instance, '_dispatch'): + return self.instance._dispatch(method, params) + else: + # call instance method directly + try: + func = resolve_dotted_attribute( + self.instance, + method, + self.allow_dotted_names + ) + except AttributeError: + pass + + if func is not None: + return func(*params) + else: + raise Exception('method "%s" is not supported' % method) + +class SimpleXMLRPCRequestHandler(BaseHTTPRequestHandler): + """Simple XML-RPC request handler class. + + Handles all HTTP POST requests and attempts to decode them as + XML-RPC requests. + """ + + # Class attribute listing the accessible path components; + # paths not on this list will result in a 404 error. + rpc_paths = ('/', '/RPC2') + + #if not None, encode responses larger than this, if possible + encode_threshold = 1400 #a common MTU + + #Override form StreamRequestHandler: full buffering of output + #and no Nagle. + wbufsize = -1 + disable_nagle_algorithm = True + + # a re to match a gzip Accept-Encoding + aepattern = re.compile(r""" + \s* ([^\s;]+) \s* #content-coding + (;\s* q \s*=\s* ([0-9\.]+))? #q + """, re.VERBOSE | re.IGNORECASE) + + def accept_encodings(self): + r = {} + ae = self.headers.get("Accept-Encoding", "") + for e in ae.split(","): + match = self.aepattern.match(e) + if match: + v = match.group(3) + v = float(v) if v else 1.0 + r[match.group(1)] = v + return r + + def is_rpc_path_valid(self): + if self.rpc_paths: + return self.path in self.rpc_paths + else: + # If .rpc_paths is empty, just assume all paths are legal + return True + + def do_POST(self): + """Handles the HTTP POST request. + + Attempts to interpret all HTTP POST requests as XML-RPC calls, + which are forwarded to the server's _dispatch method for handling. + """ + + # Check that the path is legal + if not self.is_rpc_path_valid(): + self.report_404() + return + + try: + # Get arguments by reading body of request. + # We read this in chunks to avoid straining + # socket.read(); around the 10 or 15Mb mark, some platforms + # begin to have problems (bug #792570). + max_chunk_size = 10*1024*1024 + size_remaining = int(self.headers["content-length"]) + L = [] + while size_remaining: + chunk_size = min(size_remaining, max_chunk_size) + chunk = self.rfile.read(chunk_size) + if not chunk: + break + L.append(chunk) + size_remaining -= len(L[-1]) + data = b''.join(L) + + data = self.decode_request_content(data) + if data is None: + return #response has been sent + + # In previous versions of SimpleXMLRPCServer, _dispatch + # could be overridden in this class, instead of in + # SimpleXMLRPCDispatcher. To maintain backwards compatibility, + # check to see if a subclass implements _dispatch and dispatch + # using that method if present. + response = self.server._marshaled_dispatch( + data, getattr(self, '_dispatch', None), self.path + ) + except Exception as e: # This should only happen if the module is buggy + # internal error, report as HTTP server error + self.send_response(500) + + # Send information about the exception if requested + if hasattr(self.server, '_send_traceback_header') and \ + self.server._send_traceback_header: + self.send_header("X-exception", str(e)) + trace = traceback.format_exc() + trace = str(trace.encode('ASCII', 'backslashreplace'), 'ASCII') + self.send_header("X-traceback", trace) + + self.send_header("Content-length", "0") + self.end_headers() + else: + self.send_response(200) + self.send_header("Content-type", "text/xml") + if self.encode_threshold is not None: + if len(response) > self.encode_threshold: + q = self.accept_encodings().get("gzip", 0) + if q: + try: + response = gzip_encode(response) + self.send_header("Content-Encoding", "gzip") + except NotImplementedError: + pass + self.send_header("Content-length", str(len(response))) + self.end_headers() + self.wfile.write(response) + + def decode_request_content(self, data): + #support gzip encoding of request + encoding = self.headers.get("content-encoding", "identity").lower() + if encoding == "identity": + return data + if encoding == "gzip": + try: + return gzip_decode(data) + except NotImplementedError: + self.send_response(501, "encoding %r not supported" % encoding) + except ValueError: + self.send_response(400, "error decoding gzip content") + else: + self.send_response(501, "encoding %r not supported" % encoding) + self.send_header("Content-length", "0") + self.end_headers() + + def report_404 (self): + # Report a 404 error + self.send_response(404) + response = b'No such page' + self.send_header("Content-type", "text/plain") + self.send_header("Content-length", str(len(response))) + self.end_headers() + self.wfile.write(response) + + def log_request(self, code='-', size='-'): + """Selectively log an accepted request.""" + + if self.server.logRequests: + BaseHTTPRequestHandler.log_request(self, code, size) + +class SimpleXMLRPCServer(socketserver.TCPServer, + SimpleXMLRPCDispatcher): + """Simple XML-RPC server. + + Simple XML-RPC server that allows functions and a single instance + to be installed to handle requests. The default implementation + attempts to dispatch XML-RPC calls to the functions or instance + installed in the server. Override the _dispatch method inherited + from SimpleXMLRPCDispatcher to change this behavior. + """ + + allow_reuse_address = True + + # Warning: this is for debugging purposes only! Never set this to True in + # production code, as will be sending out sensitive information (exception + # and stack trace details) when exceptions are raised inside + # SimpleXMLRPCRequestHandler.do_POST + _send_traceback_header = False + + def __init__(self, addr, requestHandler=SimpleXMLRPCRequestHandler, + logRequests=True, allow_none=False, encoding=None, + bind_and_activate=True, use_builtin_types=False): + self.logRequests = logRequests + + SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding, use_builtin_types) + socketserver.TCPServer.__init__(self, addr, requestHandler, bind_and_activate) + + # [Bug #1222790] If possible, set close-on-exec flag; if a + # method spawns a subprocess, the subprocess shouldn't have + # the listening socket open. + if fcntl is not None and hasattr(fcntl, 'FD_CLOEXEC'): + flags = fcntl.fcntl(self.fileno(), fcntl.F_GETFD) + flags |= fcntl.FD_CLOEXEC + fcntl.fcntl(self.fileno(), fcntl.F_SETFD, flags) + +class MultiPathXMLRPCServer(SimpleXMLRPCServer): + """Multipath XML-RPC Server + This specialization of SimpleXMLRPCServer allows the user to create + multiple Dispatcher instances and assign them to different + HTTP request paths. This makes it possible to run two or more + 'virtual XML-RPC servers' at the same port. + Make sure that the requestHandler accepts the paths in question. + """ + def __init__(self, addr, requestHandler=SimpleXMLRPCRequestHandler, + logRequests=True, allow_none=False, encoding=None, + bind_and_activate=True, use_builtin_types=False): + + SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests, allow_none, + encoding, bind_and_activate, use_builtin_types) + self.dispatchers = {} + self.allow_none = allow_none + self.encoding = encoding or 'utf-8' + + def add_dispatcher(self, path, dispatcher): + self.dispatchers[path] = dispatcher + return dispatcher + + def get_dispatcher(self, path): + return self.dispatchers[path] + + def _marshaled_dispatch(self, data, dispatch_method = None, path = None): + try: + response = self.dispatchers[path]._marshaled_dispatch( + data, dispatch_method, path) + except: + # report low level exception back to server + # (each dispatcher should have handled their own + # exceptions) + exc_type, exc_value = sys.exc_info()[:2] + response = dumps( + Fault(1, "%s:%s" % (exc_type, exc_value)), + encoding=self.encoding, allow_none=self.allow_none) + response = response.encode(self.encoding) + return response + +class CGIXMLRPCRequestHandler(SimpleXMLRPCDispatcher): + """Simple handler for XML-RPC data passed through CGI.""" + + def __init__(self, allow_none=False, encoding=None, use_builtin_types=False): + SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding, use_builtin_types) + + def handle_xmlrpc(self, request_text): + """Handle a single XML-RPC request""" + + response = self._marshaled_dispatch(request_text) + + print('Content-Type: text/xml') + print('Content-Length: %d' % len(response)) + print() + sys.stdout.flush() + sys.stdout.buffer.write(response) + sys.stdout.buffer.flush() + + def handle_get(self): + """Handle a single HTTP GET request. + + Default implementation indicates an error because + XML-RPC uses the POST method. + """ + + code = 400 + message, explain = BaseHTTPRequestHandler.responses[code] + + response = http_server.DEFAULT_ERROR_MESSAGE % \ + { + 'code' : code, + 'message' : message, + 'explain' : explain + } + response = response.encode('utf-8') + print('Status: %d %s' % (code, message)) + print('Content-Type: %s' % http_server.DEFAULT_ERROR_CONTENT_TYPE) + print('Content-Length: %d' % len(response)) + print() + sys.stdout.flush() + sys.stdout.buffer.write(response) + sys.stdout.buffer.flush() + + def handle_request(self, request_text=None): + """Handle a single XML-RPC request passed through a CGI post method. + + If no XML data is given then it is read from stdin. The resulting + XML-RPC response is printed to stdout along with the correct HTTP + headers. + """ + + if request_text is None and \ + os.environ.get('REQUEST_METHOD', None) == 'GET': + self.handle_get() + else: + # POST data is normally available through stdin + try: + length = int(os.environ.get('CONTENT_LENGTH', None)) + except (ValueError, TypeError): + length = -1 + if request_text is None: + request_text = sys.stdin.read(length) + + self.handle_xmlrpc(request_text) + + +# ----------------------------------------------------------------------------- +# Self documenting XML-RPC Server. + +class ServerHTMLDoc(pydoc.HTMLDoc): + """Class used to generate pydoc HTML document for a server""" + + def markup(self, text, escape=None, funcs={}, classes={}, methods={}): + """Mark up some plain text, given a context of symbols to look for. + Each context dictionary maps object names to anchor names.""" + escape = escape or self.escape + results = [] + here = 0 + + # XXX Note that this regular expression does not allow for the + # hyperlinking of arbitrary strings being used as method + # names. Only methods with names consisting of word characters + # and '.'s are hyperlinked. + pattern = re.compile(r'\b((http|ftp)://\S+[\w/]|' + r'RFC[- ]?(\d+)|' + r'PEP[- ]?(\d+)|' + r'(self\.)?((?:\w|\.)+))\b') + while 1: + match = pattern.search(text, here) + if not match: break + start, end = match.span() + results.append(escape(text[here:start])) + + all, scheme, rfc, pep, selfdot, name = match.groups() + if scheme: + url = escape(all).replace('"', '"') + results.append('%s' % (url, url)) + elif rfc: + url = 'http://www.rfc-editor.org/rfc/rfc%d.txt' % int(rfc) + results.append('%s' % (url, escape(all))) + elif pep: + url = 'http://www.python.org/dev/peps/pep-%04d/' % int(pep) + results.append('%s' % (url, escape(all))) + elif text[end:end+1] == '(': + results.append(self.namelink(name, methods, funcs, classes)) + elif selfdot: + results.append('self.%s' % name) + else: + results.append(self.namelink(name, classes)) + here = end + results.append(escape(text[here:])) + return ''.join(results) + + def docroutine(self, object, name, mod=None, + funcs={}, classes={}, methods={}, cl=None): + """Produce HTML documentation for a function or method object.""" + + anchor = (cl and cl.__name__ or '') + '-' + name + note = '' + + title = '%s' % ( + self.escape(anchor), self.escape(name)) + + if inspect.ismethod(object): + args = inspect.getfullargspec(object) + # exclude the argument bound to the instance, it will be + # confusing to the non-Python user + argspec = inspect.formatargspec ( + args.args[1:], + args.varargs, + args.varkw, + args.defaults, + annotations=args.annotations, + formatvalue=self.formatvalue + ) + elif inspect.isfunction(object): + args = inspect.getfullargspec(object) + argspec = inspect.formatargspec( + args.args, args.varargs, args.varkw, args.defaults, + annotations=args.annotations, + formatvalue=self.formatvalue) + else: + argspec = '(...)' + + if isinstance(object, tuple): + argspec = object[0] or argspec + docstring = object[1] or "" + else: + docstring = pydoc.getdoc(object) + + decl = title + argspec + (note and self.grey( + '%s' % note)) + + doc = self.markup( + docstring, self.preformat, funcs, classes, methods) + doc = doc and '

    %s
    ' % doc + return '
    %s
    %s
    \n' % (decl, doc) + + def docserver(self, server_name, package_documentation, methods): + """Produce HTML documentation for an XML-RPC server.""" + + fdict = {} + for key, value in methods.items(): + fdict[key] = '#-' + key + fdict[value] = fdict[key] + + server_name = self.escape(server_name) + head = '%s' % server_name + result = self.heading(head, '#ffffff', '#7799ee') + + doc = self.markup(package_documentation, self.preformat, fdict) + doc = doc and '%s' % doc + result = result + '

    %s

    \n' % doc + + contents = [] + method_items = sorted(methods.items()) + for key, value in method_items: + contents.append(self.docroutine(value, key, funcs=fdict)) + result = result + self.bigsection( + 'Methods', '#ffffff', '#eeaa77', ''.join(contents)) + + return result + +class XMLRPCDocGenerator(object): + """Generates documentation for an XML-RPC server. + + This class is designed as mix-in and should not + be constructed directly. + """ + + def __init__(self): + # setup variables used for HTML documentation + self.server_name = 'XML-RPC Server Documentation' + self.server_documentation = \ + "This server exports the following methods through the XML-RPC "\ + "protocol." + self.server_title = 'XML-RPC Server Documentation' + + def set_server_title(self, server_title): + """Set the HTML title of the generated server documentation""" + + self.server_title = server_title + + def set_server_name(self, server_name): + """Set the name of the generated HTML server documentation""" + + self.server_name = server_name + + def set_server_documentation(self, server_documentation): + """Set the documentation string for the entire server.""" + + self.server_documentation = server_documentation + + def generate_html_documentation(self): + """generate_html_documentation() => html documentation for the server + + Generates HTML documentation for the server using introspection for + installed functions and instances that do not implement the + _dispatch method. Alternatively, instances can choose to implement + the _get_method_argstring(method_name) method to provide the + argument string used in the documentation and the + _methodHelp(method_name) method to provide the help text used + in the documentation.""" + + methods = {} + + for method_name in self.system_listMethods(): + if method_name in self.funcs: + method = self.funcs[method_name] + elif self.instance is not None: + method_info = [None, None] # argspec, documentation + if hasattr(self.instance, '_get_method_argstring'): + method_info[0] = self.instance._get_method_argstring(method_name) + if hasattr(self.instance, '_methodHelp'): + method_info[1] = self.instance._methodHelp(method_name) + + method_info = tuple(method_info) + if method_info != (None, None): + method = method_info + elif not hasattr(self.instance, '_dispatch'): + try: + method = resolve_dotted_attribute( + self.instance, + method_name + ) + except AttributeError: + method = method_info + else: + method = method_info + else: + assert 0, "Could not find method in self.functions and no "\ + "instance installed" + + methods[method_name] = method + + documenter = ServerHTMLDoc() + documentation = documenter.docserver( + self.server_name, + self.server_documentation, + methods + ) + + return documenter.page(self.server_title, documentation) + +class DocXMLRPCRequestHandler(SimpleXMLRPCRequestHandler): + """XML-RPC and documentation request handler class. + + Handles all HTTP POST requests and attempts to decode them as + XML-RPC requests. + + Handles all HTTP GET requests and interprets them as requests + for documentation. + """ + + def do_GET(self): + """Handles the HTTP GET request. + + Interpret all HTTP GET requests as requests for server + documentation. + """ + # Check that the path is legal + if not self.is_rpc_path_valid(): + self.report_404() + return + + response = self.server.generate_html_documentation().encode('utf-8') + self.send_response(200) + self.send_header("Content-type", "text/html") + self.send_header("Content-length", str(len(response))) + self.end_headers() + self.wfile.write(response) + +class DocXMLRPCServer( SimpleXMLRPCServer, + XMLRPCDocGenerator): + """XML-RPC and HTML documentation server. + + Adds the ability to serve server documentation to the capabilities + of SimpleXMLRPCServer. + """ + + def __init__(self, addr, requestHandler=DocXMLRPCRequestHandler, + logRequests=True, allow_none=False, encoding=None, + bind_and_activate=True, use_builtin_types=False): + SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests, + allow_none, encoding, bind_and_activate, + use_builtin_types) + XMLRPCDocGenerator.__init__(self) + +class DocCGIXMLRPCRequestHandler( CGIXMLRPCRequestHandler, + XMLRPCDocGenerator): + """Handler for XML-RPC data and documentation requests passed through + CGI""" + + def handle_get(self): + """Handles the HTTP GET request. + + Interpret all HTTP GET requests as requests for server + documentation. + """ + + response = self.generate_html_documentation().encode('utf-8') + + print('Content-Type: text/html') + print('Content-Length: %d' % len(response)) + print() + sys.stdout.flush() + sys.stdout.buffer.write(response) + sys.stdout.buffer.flush() + + def __init__(self): + CGIXMLRPCRequestHandler.__init__(self) + XMLRPCDocGenerator.__init__(self) + + +if __name__ == '__main__': + import datetime + + class ExampleService: + def getData(self): + return '42' + + class currentTime: + @staticmethod + def getCurrentTime(): + return datetime.datetime.now() + + server = SimpleXMLRPCServer(("localhost", 8000)) + server.register_function(pow) + server.register_function(lambda x,y: x+y, 'add') + server.register_instance(ExampleService(), allow_dotted_names=True) + server.register_multicall_functions() + print('Serving XML-RPC on localhost port 8000') + print('It is advisable to run this example server within a secure, closed network.') + try: + server.serve_forever() + except KeyboardInterrupt: + print("\nKeyboard interrupt received, exiting.") + server.server_close() + sys.exit(0) diff --git a/src/future/builtins/__init__.py b/src/future/builtins/__init__.py new file mode 100644 index 00000000..1734cd45 --- /dev/null +++ b/src/future/builtins/__init__.py @@ -0,0 +1,51 @@ +""" +A module that brings in equivalents of the new and modified Python 3 +builtins into Py2. Has no effect on Py3. + +See the docs `here `_ +(``docs/what-else.rst``) for more information. + +""" + +from future.builtins.iterators import (filter, map, zip) +# The isinstance import is no longer needed. We provide it only for +# backward-compatibility with future v0.8.2. It will be removed in future v1.0. +from future.builtins.misc import (ascii, chr, hex, input, isinstance, next, + oct, open, pow, round, super, max, min) +from future.utils import PY3 + +if PY3: + import builtins + bytes = builtins.bytes + dict = builtins.dict + int = builtins.int + list = builtins.list + object = builtins.object + range = builtins.range + str = builtins.str + __all__ = [] +else: + from future.types import (newbytes as bytes, + newdict as dict, + newint as int, + newlist as list, + newobject as object, + newrange as range, + newstr as str) +from future import utils + + +if not utils.PY3: + # We only import names that shadow the builtins on Py2. No other namespace + # pollution on Py2. + + # Only shadow builtins on Py2; no new names + __all__ = ['filter', 'map', 'zip', + 'ascii', 'chr', 'hex', 'input', 'next', 'oct', 'open', 'pow', + 'round', 'super', + 'bytes', 'dict', 'int', 'list', 'object', 'range', 'str', 'max', 'min' + ] + +else: + # No namespace pollution on Py3 + __all__ = [] diff --git a/future/builtins/disabled.py b/src/future/builtins/disabled.py similarity index 86% rename from future/builtins/disabled.py rename to src/future/builtins/disabled.py index 9d266ce6..f6d6ea9b 100644 --- a/future/builtins/disabled.py +++ b/src/future/builtins/disabled.py @@ -19,11 +19,11 @@ Note that both ``input()`` and ``raw_input()`` are among the disabled functions (in this module). Although ``input()`` exists as a builtin in Python 3, the Python 2 ``input()`` builtin is unsafe to use because it -can lead to shell injection. Therefore we shadow it by default, in case -someone forgets to import our replacement ``input()`` somehow and expects -Python 3 semantics. +can lead to shell injection. Therefore we shadow it by default upon ``from +future.builtins.disabled import *``, in case someone forgets to import our +replacement ``input()`` somehow and expects Python 3 semantics. -See the ``future.builtins.backports`` module for a working version of +See the ``future.builtins.misc`` module for a working version of ``input`` with Python 3 semantics. (Note that callable() is not among the functions disabled; this was diff --git a/future/builtins/iterators.py b/src/future/builtins/iterators.py similarity index 91% rename from future/builtins/iterators.py rename to src/future/builtins/iterators.py index 5bc94312..dff651e0 100644 --- a/future/builtins/iterators.py +++ b/src/future/builtins/iterators.py @@ -7,7 +7,7 @@ for i in range(10**15): pass - + for (a, b) in zip(range(10**15), range(-10**15, 0)): pass @@ -36,17 +36,17 @@ import itertools from future import utils -from future.builtins.backports import range if not utils.PY3: + filter = itertools.ifilter map = itertools.imap + from future.types import newrange as range zip = itertools.izip - filter = itertools.ifilter - __all__ = ['range', 'map', 'zip', 'filter'] + __all__ = ['filter', 'map', 'range', 'zip'] else: import builtins + filter = builtins.filter map = builtins.map + range = builtins.range zip = builtins.zip - filter = builtins.filter __all__ = [] - diff --git a/src/future/builtins/misc.py b/src/future/builtins/misc.py new file mode 100644 index 00000000..f86ce5f3 --- /dev/null +++ b/src/future/builtins/misc.py @@ -0,0 +1,135 @@ +""" +A module that brings in equivalents of various modified Python 3 builtins +into Py2. Has no effect on Py3. + +The builtin functions are: + +- ``ascii`` (from Py2's future_builtins module) +- ``hex`` (from Py2's future_builtins module) +- ``oct`` (from Py2's future_builtins module) +- ``chr`` (equivalent to ``unichr`` on Py2) +- ``input`` (equivalent to ``raw_input`` on Py2) +- ``next`` (calls ``__next__`` if it exists, else ``next`` method) +- ``open`` (equivalent to io.open on Py2) +- ``super`` (backport of Py3's magic zero-argument super() function +- ``round`` (new "Banker's Rounding" behaviour from Py3) +- ``max`` (new default option from Py3.4) +- ``min`` (new default option from Py3.4) + +``isinstance`` is also currently exported for backwards compatibility +with v0.8.2, although this has been deprecated since v0.9. + + +input() +------- +Like the new ``input()`` function from Python 3 (without eval()), except +that it returns bytes. Equivalent to Python 2's ``raw_input()``. + +Warning: By default, importing this module *removes* the old Python 2 +input() function entirely from ``__builtin__`` for safety. This is +because forgetting to import the new ``input`` from ``future`` might +otherwise lead to a security vulnerability (shell injection) on Python 2. + +To restore it, you can retrieve it yourself from +``__builtin__._old_input``. + +Fortunately, ``input()`` seems to be seldom used in the wild in Python +2... + +""" + +from future import utils + + +if utils.PY2: + from io import open + from future_builtins import ascii, oct, hex + from __builtin__ import unichr as chr, pow as _builtin_pow + import __builtin__ + + # Only for backward compatibility with future v0.8.2: + isinstance = __builtin__.isinstance + + # Warning: Python 2's input() is unsafe and MUST not be able to be used + # accidentally by someone who expects Python 3 semantics but forgets + # to import it on Python 2. Versions of ``future`` prior to 0.11 + # deleted it from __builtin__. Now we keep in __builtin__ but shadow + # the name like all others. Just be sure to import ``input``. + + input = raw_input + + from future.builtins.newnext import newnext as next + from future.builtins.newround import newround as round + from future.builtins.newsuper import newsuper as super + from future.builtins.new_min_max import newmax as max + from future.builtins.new_min_max import newmin as min + from future.types.newint import newint + + _SENTINEL = object() + + def pow(x, y, z=_SENTINEL): + """ + pow(x, y[, z]) -> number + + With two arguments, equivalent to x**y. With three arguments, + equivalent to (x**y) % z, but may be more efficient (e.g. for ints). + """ + # Handle newints + if isinstance(x, newint): + x = long(x) + if isinstance(y, newint): + y = long(y) + if isinstance(z, newint): + z = long(z) + + try: + if z == _SENTINEL: + return _builtin_pow(x, y) + else: + return _builtin_pow(x, y, z) + except ValueError: + if z == _SENTINEL: + return _builtin_pow(x+0j, y) + else: + return _builtin_pow(x+0j, y, z) + + + # ``future`` doesn't support Py3.0/3.1. If we ever did, we'd add this: + # callable = __builtin__.callable + + __all__ = ['ascii', 'chr', 'hex', 'input', 'isinstance', 'next', 'oct', + 'open', 'pow', 'round', 'super', 'max', 'min'] + +else: + import builtins + ascii = builtins.ascii + chr = builtins.chr + hex = builtins.hex + input = builtins.input + next = builtins.next + # Only for backward compatibility with future v0.8.2: + isinstance = builtins.isinstance + oct = builtins.oct + open = builtins.open + pow = builtins.pow + round = builtins.round + super = builtins.super + if utils.PY34_PLUS: + max = builtins.max + min = builtins.min + __all__ = [] + else: + from future.builtins.new_min_max import newmax as max + from future.builtins.new_min_max import newmin as min + __all__ = ['min', 'max'] + + # The callable() function was removed from Py3.0 and 3.1 and + # reintroduced into Py3.2+. ``future`` doesn't support Py3.0/3.1. If we ever + # did, we'd add this: + # try: + # callable = builtins.callable + # except AttributeError: + # # Definition from Pandas + # def callable(obj): + # return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) + # __all__.append('callable') diff --git a/src/future/builtins/new_min_max.py b/src/future/builtins/new_min_max.py new file mode 100644 index 00000000..6f0c2a86 --- /dev/null +++ b/src/future/builtins/new_min_max.py @@ -0,0 +1,59 @@ +import itertools + +from future import utils +if utils.PY2: + from __builtin__ import max as _builtin_max, min as _builtin_min +else: + from builtins import max as _builtin_max, min as _builtin_min + +_SENTINEL = object() + + +def newmin(*args, **kwargs): + return new_min_max(_builtin_min, *args, **kwargs) + + +def newmax(*args, **kwargs): + return new_min_max(_builtin_max, *args, **kwargs) + + +def new_min_max(_builtin_func, *args, **kwargs): + """ + To support the argument "default" introduced in python 3.4 for min and max + :param _builtin_func: builtin min or builtin max + :param args: + :param kwargs: + :return: returns the min or max based on the arguments passed + """ + + for key, _ in kwargs.items(): + if key not in set(['key', 'default']): + raise TypeError('Illegal argument %s', key) + + if len(args) == 0: + raise TypeError + + if len(args) != 1 and kwargs.get('default', _SENTINEL) is not _SENTINEL: + raise TypeError + + if len(args) == 1: + iterator = iter(args[0]) + try: + first = next(iterator) + except StopIteration: + if kwargs.get('default', _SENTINEL) is not _SENTINEL: + return kwargs.get('default') + else: + raise ValueError('{}() arg is an empty sequence'.format(_builtin_func.__name__)) + else: + iterator = itertools.chain([first], iterator) + if kwargs.get('key') is not None: + return _builtin_func(iterator, key=kwargs.get('key')) + else: + return _builtin_func(iterator) + + if len(args) > 1: + if kwargs.get('key') is not None: + return _builtin_func(args, key=kwargs.get('key')) + else: + return _builtin_func(args) diff --git a/src/future/builtins/newnext.py b/src/future/builtins/newnext.py new file mode 100644 index 00000000..097638ac --- /dev/null +++ b/src/future/builtins/newnext.py @@ -0,0 +1,70 @@ +''' +This module provides a newnext() function in Python 2 that mimics the +behaviour of ``next()`` in Python 3, falling back to Python 2's behaviour for +compatibility if this fails. + +``newnext(iterator)`` calls the iterator's ``__next__()`` method if it exists. If this +doesn't exist, it falls back to calling a ``next()`` method. + +For example: + + >>> class Odds(object): + ... def __init__(self, start=1): + ... self.value = start - 2 + ... def __next__(self): # note the Py3 interface + ... self.value += 2 + ... return self.value + ... def __iter__(self): + ... return self + ... + >>> iterator = Odds() + >>> next(iterator) + 1 + >>> next(iterator) + 3 + +If you are defining your own custom iterator class as above, it is preferable +to explicitly decorate the class with the @implements_iterator decorator from +``future.utils`` as follows: + + >>> @implements_iterator + ... class Odds(object): + ... # etc + ... pass + +This next() function is primarily for consuming iterators defined in Python 3 +code elsewhere that we would like to run on Python 2 or 3. +''' + +_builtin_next = next + +_SENTINEL = object() + +def newnext(iterator, default=_SENTINEL): + """ + next(iterator[, default]) + + Return the next item from the iterator. If default is given and the iterator + is exhausted, it is returned instead of raising StopIteration. + """ + + # args = [] + # if default is not _SENTINEL: + # args.append(default) + try: + try: + return iterator.__next__() + except AttributeError: + try: + return iterator.next() + except AttributeError: + raise TypeError("'{0}' object is not an iterator".format( + iterator.__class__.__name__)) + except StopIteration as e: + if default is _SENTINEL: + raise e + else: + return default + + +__all__ = ['newnext'] diff --git a/src/future/builtins/newround.py b/src/future/builtins/newround.py new file mode 100644 index 00000000..b06c1169 --- /dev/null +++ b/src/future/builtins/newround.py @@ -0,0 +1,105 @@ +""" +``python-future``: pure Python implementation of Python 3 round(). +""" + +from __future__ import division +from future.utils import PYPY, PY26, bind_method + +# Use the decimal module for simplicity of implementation (and +# hopefully correctness). +from decimal import Decimal, ROUND_HALF_EVEN + + +def newround(number, ndigits=None): + """ + See Python 3 documentation: uses Banker's Rounding. + + Delegates to the __round__ method if for some reason this exists. + + If not, rounds a number to a given precision in decimal digits (default + 0 digits). This returns an int when called with one argument, + otherwise the same type as the number. ndigits may be negative. + + See the test_round method in future/tests/test_builtins.py for + examples. + """ + return_int = False + if ndigits is None: + return_int = True + ndigits = 0 + if hasattr(number, '__round__'): + return number.__round__(ndigits) + + exponent = Decimal('10') ** (-ndigits) + + # Work around issue #24: round() breaks on PyPy with NumPy's types + # Also breaks on CPython with NumPy's specialized int types like uint64 + if 'numpy' in repr(type(number)): + number = float(number) + + if isinstance(number, Decimal): + d = number + else: + if not PY26: + d = Decimal.from_float(number) + else: + d = from_float_26(number) + + if ndigits < 0: + result = newround(d / exponent) * exponent + else: + result = d.quantize(exponent, rounding=ROUND_HALF_EVEN) + + if return_int: + return int(result) + else: + return float(result) + + +### From Python 2.7's decimal.py. Only needed to support Py2.6: + +def from_float_26(f): + """Converts a float to a decimal number, exactly. + + Note that Decimal.from_float(0.1) is not the same as Decimal('0.1'). + Since 0.1 is not exactly representable in binary floating point, the + value is stored as the nearest representable value which is + 0x1.999999999999ap-4. The exact equivalent of the value in decimal + is 0.1000000000000000055511151231257827021181583404541015625. + + >>> Decimal.from_float(0.1) + Decimal('0.1000000000000000055511151231257827021181583404541015625') + >>> Decimal.from_float(float('nan')) + Decimal('NaN') + >>> Decimal.from_float(float('inf')) + Decimal('Infinity') + >>> Decimal.from_float(-float('inf')) + Decimal('-Infinity') + >>> Decimal.from_float(-0.0) + Decimal('-0') + + """ + import math as _math + from decimal import _dec_from_triple # only available on Py2.6 and Py2.7 (not 3.3) + + if isinstance(f, (int, long)): # handle integer inputs + return Decimal(f) + if _math.isinf(f) or _math.isnan(f): # raises TypeError if not a float + return Decimal(repr(f)) + if _math.copysign(1.0, f) == 1.0: + sign = 0 + else: + sign = 1 + n, d = abs(f).as_integer_ratio() + # int.bit_length() method doesn't exist on Py2.6: + def bit_length(d): + if d != 0: + return len(bin(abs(d))) - 2 + else: + return 0 + k = bit_length(d) - 1 + result = _dec_from_triple(sign, str(n*5**k), -k) + return result + + +__all__ = ['newround'] diff --git a/future/builtins/backports/newsuper.py b/src/future/builtins/newsuper.py similarity index 57% rename from future/builtins/backports/newsuper.py rename to src/future/builtins/newsuper.py index a03bb9bc..3e8cc80f 100644 --- a/future/builtins/backports/newsuper.py +++ b/src/future/builtins/newsuper.py @@ -24,16 +24,18 @@ def append(self, item): "Of course, you can still explicitly pass in the arguments if you want to do something strange. Sometimes you really do want that, e.g. to skip over some classes in the method resolution order. - + "How does it work? By inspecting the calling frame to determine the function object being executed and the object on which it's being called, and then walking the object's __mro__ chain to find out where that function was defined. Yuck, but it seems to work..." ''' +from __future__ import absolute_import import sys +from types import FunctionType -from future.utils import PY3 +from future.utils import PY3, PY26 _builtin_super = super @@ -49,46 +51,59 @@ def newsuper(typ=_SENTINEL, type_or_obj=_SENTINEL, framedepth=1): # Infer the correct call if used without arguments. if typ is _SENTINEL: # We'll need to do some frame hacking. - f = sys._getframe(framedepth) + f = sys._getframe(framedepth) try: # Get the function's first positional argument. type_or_obj = f.f_locals[f.f_code.co_varnames[0]] except (IndexError, KeyError,): raise RuntimeError('super() used in a function with no args') - + try: - # Get the MRO so we can crawl it. - mro = type_or_obj.__mro__ - except AttributeError: + typ = find_owner(type_or_obj, f.f_code) + except (AttributeError, RuntimeError, TypeError): + # see issues #160, #267 try: - mro = type_or_obj.__class__.__mro__ + typ = find_owner(type_or_obj.__class__, f.f_code) except AttributeError: - raise RuntimeError('super() used with a non-newstyle class') - - # A ``for...else`` block? Yes! It's odd, but useful. - # If unfamiliar with for...else, see: - # - # http://psung.blogspot.com/2007/12/for-else-in-python.html - for typ in mro: - # Find the class that owns the currently-executing method. - for meth in typ.__dict__.values(): - if not isinstance(meth, type(newsuper)): - continue - if meth.func_code is f.f_code: - break # Aha! Found you. - else: - continue # Not found! Move onto the next class in MRO. - break # Found! Break out of the search loop. - else: - raise RuntimeError('super() called outside a method') - + raise RuntimeError('super() used with an old-style class') + except TypeError: + raise RuntimeError('super() called outside a method') + # Dispatch to builtin super(). if type_or_obj is not _SENTINEL: return _builtin_super(typ, type_or_obj) return _builtin_super(typ) +def find_owner(cls, code): + '''Find the class that owns the currently-executing method. + ''' + for typ in cls.__mro__: + for meth in typ.__dict__.values(): + # Drill down through any wrappers to the underlying func. + # This handles e.g. classmethod() and staticmethod(). + try: + while not isinstance(meth,FunctionType): + if isinstance(meth, property): + # Calling __get__ on the property will invoke + # user code which might throw exceptions or have + # side effects + meth = meth.fget + else: + try: + meth = meth.__func__ + except AttributeError: + meth = meth.__get__(cls, typ) + except (AttributeError, TypeError): + continue + if meth.func_code is code: + return typ # Aha! Found you. + # Not found! Move onto the next class in MRO. + + raise TypeError + + def superm(*args, **kwds): f = sys._getframe(1) nm = f.f_code.co_name @@ -96,4 +111,3 @@ def superm(*args, **kwds): __all__ = ['newsuper'] - diff --git a/src/future/moves/__init__.py b/src/future/moves/__init__.py new file mode 100644 index 00000000..0cd60d3d --- /dev/null +++ b/src/future/moves/__init__.py @@ -0,0 +1,8 @@ +# future.moves package +from __future__ import absolute_import +import sys +__future_module__ = True +from future.standard_library import import_top_level_modules + +if sys.version_info[0] >= 3: + import_top_level_modules() diff --git a/src/future/moves/_dummy_thread.py b/src/future/moves/_dummy_thread.py new file mode 100644 index 00000000..6633f42e --- /dev/null +++ b/src/future/moves/_dummy_thread.py @@ -0,0 +1,13 @@ +from __future__ import absolute_import +from future.utils import PY3, PY39_PLUS + + +if PY39_PLUS: + # _dummy_thread and dummy_threading modules were both deprecated in + # Python 3.7 and removed in Python 3.9 + from _thread import * +elif PY3: + from _dummy_thread import * +else: + __future_module__ = True + from dummy_thread import * diff --git a/src/future/moves/_markupbase.py b/src/future/moves/_markupbase.py new file mode 100644 index 00000000..f9fb4bbf --- /dev/null +++ b/src/future/moves/_markupbase.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from _markupbase import * +else: + __future_module__ = True + from markupbase import * diff --git a/src/future/moves/_thread.py b/src/future/moves/_thread.py new file mode 100644 index 00000000..c68018bb --- /dev/null +++ b/src/future/moves/_thread.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from _thread import * +else: + __future_module__ = True + from thread import * diff --git a/src/future/moves/builtins.py b/src/future/moves/builtins.py new file mode 100644 index 00000000..e4b6221d --- /dev/null +++ b/src/future/moves/builtins.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from builtins import * +else: + __future_module__ = True + from __builtin__ import * + # Overwrite any old definitions with the equivalent future.builtins ones: + from future.builtins import * diff --git a/src/future/moves/collections.py b/src/future/moves/collections.py new file mode 100644 index 00000000..664ee6a3 --- /dev/null +++ b/src/future/moves/collections.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import +import sys + +from future.utils import PY2, PY26 +__future_module__ = True + +from collections import * + +if PY2: + from UserDict import UserDict + from UserList import UserList + from UserString import UserString + +if PY26: + from future.backports.misc import OrderedDict, Counter + +if sys.version_info < (3, 3): + from future.backports.misc import ChainMap, _count_elements diff --git a/src/future/moves/configparser.py b/src/future/moves/configparser.py new file mode 100644 index 00000000..33d9cf95 --- /dev/null +++ b/src/future/moves/configparser.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import + +from future.utils import PY2 + +if PY2: + from ConfigParser import * +else: + from configparser import * diff --git a/src/future/moves/copyreg.py b/src/future/moves/copyreg.py new file mode 100644 index 00000000..9d08cdc5 --- /dev/null +++ b/src/future/moves/copyreg.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + import copyreg, sys + # A "*" import uses Python 3's copyreg.__all__ which does not include + # all public names in the API surface for copyreg, this avoids that + # problem by just making our module _be_ a reference to the actual module. + sys.modules['future.moves.copyreg'] = copyreg +else: + __future_module__ = True + from copy_reg import * diff --git a/src/future/moves/dbm/__init__.py b/src/future/moves/dbm/__init__.py new file mode 100644 index 00000000..626b406f --- /dev/null +++ b/src/future/moves/dbm/__init__.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from dbm import * +else: + __future_module__ = True + from whichdb import * + from anydbm import * + +# Py3.3's dbm/__init__.py imports ndbm but doesn't expose it via __all__. +# In case some (badly written) code depends on dbm.ndbm after import dbm, +# we simulate this: +if PY3: + from dbm import ndbm +else: + try: + from future.moves.dbm import ndbm + except ImportError: + ndbm = None diff --git a/src/future/moves/dbm/dumb.py b/src/future/moves/dbm/dumb.py new file mode 100644 index 00000000..528383f6 --- /dev/null +++ b/src/future/moves/dbm/dumb.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from dbm.dumb import * +else: + __future_module__ = True + from dumbdbm import * diff --git a/src/future/moves/dbm/gnu.py b/src/future/moves/dbm/gnu.py new file mode 100644 index 00000000..68ccf67b --- /dev/null +++ b/src/future/moves/dbm/gnu.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from dbm.gnu import * +else: + __future_module__ = True + from gdbm import * diff --git a/src/future/moves/dbm/ndbm.py b/src/future/moves/dbm/ndbm.py new file mode 100644 index 00000000..8c6fff8a --- /dev/null +++ b/src/future/moves/dbm/ndbm.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from dbm.ndbm import * +else: + __future_module__ = True + from dbm import * diff --git a/src/future/moves/html/__init__.py b/src/future/moves/html/__init__.py new file mode 100644 index 00000000..22ed6e7d --- /dev/null +++ b/src/future/moves/html/__init__.py @@ -0,0 +1,31 @@ +from __future__ import absolute_import +from future.utils import PY3 +__future_module__ = True + +if PY3: + from html import * +else: + # cgi.escape isn't good enough for the single Py3.3 html test to pass. + # Define it inline here instead. From the Py3.4 stdlib. Note that the + # html.escape() function from the Py3.3 stdlib is not suitable for use on + # Py2.x. + """ + General functions for HTML manipulation. + """ + + def escape(s, quote=True): + """ + Replace special characters "&", "<" and ">" to HTML-safe sequences. + If the optional flag quote is true (the default), the quotation mark + characters, both double quote (") and single quote (') characters are also + translated. + """ + s = s.replace("&", "&") # Must be done first! + s = s.replace("<", "<") + s = s.replace(">", ">") + if quote: + s = s.replace('"', """) + s = s.replace('\'', "'") + return s + + __all__ = ['escape'] diff --git a/src/future/moves/html/entities.py b/src/future/moves/html/entities.py new file mode 100644 index 00000000..56a88609 --- /dev/null +++ b/src/future/moves/html/entities.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from html.entities import * +else: + __future_module__ = True + from htmlentitydefs import * diff --git a/src/future/moves/html/parser.py b/src/future/moves/html/parser.py new file mode 100644 index 00000000..a6115b59 --- /dev/null +++ b/src/future/moves/html/parser.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 +__future_module__ = True + +if PY3: + from html.parser import * +else: + from HTMLParser import * diff --git a/src/future/moves/http/__init__.py b/src/future/moves/http/__init__.py new file mode 100644 index 00000000..917b3d71 --- /dev/null +++ b/src/future/moves/http/__init__.py @@ -0,0 +1,4 @@ +from future.utils import PY3 + +if not PY3: + __future_module__ = True diff --git a/src/future/moves/http/client.py b/src/future/moves/http/client.py new file mode 100644 index 00000000..55f9c9c1 --- /dev/null +++ b/src/future/moves/http/client.py @@ -0,0 +1,8 @@ +from future.utils import PY3 + +if PY3: + from http.client import * +else: + from httplib import * + from httplib import HTTPMessage + __future_module__ = True diff --git a/src/future/moves/http/cookiejar.py b/src/future/moves/http/cookiejar.py new file mode 100644 index 00000000..ea00df77 --- /dev/null +++ b/src/future/moves/http/cookiejar.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from http.cookiejar import * +else: + __future_module__ = True + from cookielib import * diff --git a/src/future/moves/http/cookies.py b/src/future/moves/http/cookies.py new file mode 100644 index 00000000..1b74fe2d --- /dev/null +++ b/src/future/moves/http/cookies.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from http.cookies import * +else: + __future_module__ = True + from Cookie import * + from Cookie import Morsel # left out of __all__ on Py2.7! diff --git a/src/future/moves/http/server.py b/src/future/moves/http/server.py new file mode 100644 index 00000000..4e75cc1d --- /dev/null +++ b/src/future/moves/http/server.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from http.server import * +else: + __future_module__ = True + from BaseHTTPServer import * + from CGIHTTPServer import * + from SimpleHTTPServer import * + try: + from CGIHTTPServer import _url_collapse_path # needed for a test + except ImportError: + try: + # Python 2.7.0 to 2.7.3 + from CGIHTTPServer import ( + _url_collapse_path_split as _url_collapse_path) + except ImportError: + # Doesn't exist on Python 2.6.x. Ignore it. + pass diff --git a/src/future/moves/itertools.py b/src/future/moves/itertools.py new file mode 100644 index 00000000..e5eb20d5 --- /dev/null +++ b/src/future/moves/itertools.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import + +from itertools import * +try: + zip_longest = izip_longest + filterfalse = ifilterfalse +except NameError: + pass diff --git a/src/future/moves/multiprocessing.py b/src/future/moves/multiprocessing.py new file mode 100644 index 00000000..a871b676 --- /dev/null +++ b/src/future/moves/multiprocessing.py @@ -0,0 +1,7 @@ +from __future__ import absolute_import +from future.utils import PY3 + +from multiprocessing import * +if not PY3: + __future_module__ = True + from multiprocessing.queues import SimpleQueue diff --git a/src/future/moves/pickle.py b/src/future/moves/pickle.py new file mode 100644 index 00000000..c53d6939 --- /dev/null +++ b/src/future/moves/pickle.py @@ -0,0 +1,11 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from pickle import * +else: + __future_module__ = True + try: + from cPickle import * + except ImportError: + from pickle import * diff --git a/src/future/moves/queue.py b/src/future/moves/queue.py new file mode 100644 index 00000000..1cb1437d --- /dev/null +++ b/src/future/moves/queue.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from queue import * +else: + __future_module__ = True + from Queue import * diff --git a/src/future/moves/reprlib.py b/src/future/moves/reprlib.py new file mode 100644 index 00000000..a313a13a --- /dev/null +++ b/src/future/moves/reprlib.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from reprlib import * +else: + __future_module__ = True + from repr import * diff --git a/src/future/moves/socketserver.py b/src/future/moves/socketserver.py new file mode 100644 index 00000000..062e0848 --- /dev/null +++ b/src/future/moves/socketserver.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from socketserver import * +else: + __future_module__ = True + from SocketServer import * diff --git a/src/future/moves/subprocess.py b/src/future/moves/subprocess.py new file mode 100644 index 00000000..43ffd2ac --- /dev/null +++ b/src/future/moves/subprocess.py @@ -0,0 +1,11 @@ +from __future__ import absolute_import +from future.utils import PY2, PY26 + +from subprocess import * + +if PY2: + __future_module__ = True + from commands import getoutput, getstatusoutput + +if PY26: + from future.backports.misc import check_output diff --git a/src/future/moves/sys.py b/src/future/moves/sys.py new file mode 100644 index 00000000..1293bcb0 --- /dev/null +++ b/src/future/moves/sys.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import + +from future.utils import PY2 + +from sys import * + +if PY2: + from __builtin__ import intern diff --git a/src/future/moves/test/__init__.py b/src/future/moves/test/__init__.py new file mode 100644 index 00000000..5cf428b6 --- /dev/null +++ b/src/future/moves/test/__init__.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if not PY3: + __future_module__ = True diff --git a/src/future/moves/test/support.py b/src/future/moves/test/support.py new file mode 100644 index 00000000..f70c9d7d --- /dev/null +++ b/src/future/moves/test/support.py @@ -0,0 +1,19 @@ +from __future__ import absolute_import + +import sys + +from future.standard_library import suspend_hooks +from future.utils import PY3 + +if PY3: + from test.support import * + if sys.version_info[:2] >= (3, 10): + from test.support.os_helper import ( + EnvironmentVarGuard, + TESTFN, + ) + from test.support.warnings_helper import check_warnings +else: + __future_module__ = True + with suspend_hooks(): + from test.test_support import * diff --git a/src/future/moves/tkinter/__init__.py b/src/future/moves/tkinter/__init__.py new file mode 100644 index 00000000..e4082966 --- /dev/null +++ b/src/future/moves/tkinter/__init__.py @@ -0,0 +1,27 @@ +from __future__ import absolute_import +from future.utils import PY3 +__future_module__ = True + +if not PY3: + from Tkinter import * + from Tkinter import (_cnfmerge, _default_root, _flatten, + _support_default_root, _test, + _tkinter, _setit) + + try: # >= 2.7.4 + from Tkinter import (_join) + except ImportError: + pass + + try: # >= 2.7.4 + from Tkinter import (_stringify) + except ImportError: + pass + + try: # >= 2.7.9 + from Tkinter import (_splitdict) + except ImportError: + pass + +else: + from tkinter import * diff --git a/src/future/moves/tkinter/colorchooser.py b/src/future/moves/tkinter/colorchooser.py new file mode 100644 index 00000000..6dde6e8d --- /dev/null +++ b/src/future/moves/tkinter/colorchooser.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.colorchooser import * +else: + try: + from tkColorChooser import * + except ImportError: + raise ImportError('The tkColorChooser module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/future/moves/tkinter/commondialog.py b/src/future/moves/tkinter/commondialog.py new file mode 100644 index 00000000..eb7ae8d6 --- /dev/null +++ b/src/future/moves/tkinter/commondialog.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.commondialog import * +else: + try: + from tkCommonDialog import * + except ImportError: + raise ImportError('The tkCommonDialog module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/future/moves/tkinter/constants.py b/src/future/moves/tkinter/constants.py new file mode 100644 index 00000000..ffe09815 --- /dev/null +++ b/src/future/moves/tkinter/constants.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.constants import * +else: + try: + from Tkconstants import * + except ImportError: + raise ImportError('The Tkconstants module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/future/moves/tkinter/dialog.py b/src/future/moves/tkinter/dialog.py new file mode 100644 index 00000000..113370ca --- /dev/null +++ b/src/future/moves/tkinter/dialog.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.dialog import * +else: + try: + from Dialog import * + except ImportError: + raise ImportError('The Dialog module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/future/moves/tkinter/dnd.py b/src/future/moves/tkinter/dnd.py new file mode 100644 index 00000000..1ab43791 --- /dev/null +++ b/src/future/moves/tkinter/dnd.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.dnd import * +else: + try: + from Tkdnd import * + except ImportError: + raise ImportError('The Tkdnd module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/future/moves/tkinter/filedialog.py b/src/future/moves/tkinter/filedialog.py new file mode 100644 index 00000000..6a6f03ca --- /dev/null +++ b/src/future/moves/tkinter/filedialog.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.filedialog import * +else: + try: + from FileDialog import * + except ImportError: + raise ImportError('The FileDialog module is missing. Does your Py2 ' + 'installation include tkinter?') + + try: + from tkFileDialog import * + except ImportError: + raise ImportError('The tkFileDialog module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/future/moves/tkinter/font.py b/src/future/moves/tkinter/font.py new file mode 100644 index 00000000..628f399a --- /dev/null +++ b/src/future/moves/tkinter/font.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.font import * +else: + try: + from tkFont import * + except ImportError: + raise ImportError('The tkFont module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/future/moves/tkinter/messagebox.py b/src/future/moves/tkinter/messagebox.py new file mode 100644 index 00000000..b43d8702 --- /dev/null +++ b/src/future/moves/tkinter/messagebox.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.messagebox import * +else: + try: + from tkMessageBox import * + except ImportError: + raise ImportError('The tkMessageBox module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/future/moves/tkinter/scrolledtext.py b/src/future/moves/tkinter/scrolledtext.py new file mode 100644 index 00000000..1c69db60 --- /dev/null +++ b/src/future/moves/tkinter/scrolledtext.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.scrolledtext import * +else: + try: + from ScrolledText import * + except ImportError: + raise ImportError('The ScrolledText module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/future/moves/tkinter/simpledialog.py b/src/future/moves/tkinter/simpledialog.py new file mode 100644 index 00000000..dba93fbf --- /dev/null +++ b/src/future/moves/tkinter/simpledialog.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.simpledialog import * +else: + try: + from SimpleDialog import * + except ImportError: + raise ImportError('The SimpleDialog module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/future/moves/tkinter/tix.py b/src/future/moves/tkinter/tix.py new file mode 100644 index 00000000..8d1718ad --- /dev/null +++ b/src/future/moves/tkinter/tix.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.tix import * +else: + try: + from Tix import * + except ImportError: + raise ImportError('The Tix module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/future/moves/tkinter/ttk.py b/src/future/moves/tkinter/ttk.py new file mode 100644 index 00000000..081c1b49 --- /dev/null +++ b/src/future/moves/tkinter/ttk.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.ttk import * +else: + try: + from ttk import * + except ImportError: + raise ImportError('The ttk module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/future/moves/urllib/__init__.py b/src/future/moves/urllib/__init__.py new file mode 100644 index 00000000..5cf428b6 --- /dev/null +++ b/src/future/moves/urllib/__init__.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if not PY3: + __future_module__ = True diff --git a/src/future/moves/urllib/error.py b/src/future/moves/urllib/error.py new file mode 100644 index 00000000..7d8ada73 --- /dev/null +++ b/src/future/moves/urllib/error.py @@ -0,0 +1,16 @@ +from __future__ import absolute_import +from future.standard_library import suspend_hooks + +from future.utils import PY3 + +if PY3: + from urllib.error import * +else: + __future_module__ = True + + # We use this method to get at the original Py2 urllib before any renaming magic + # ContentTooShortError = sys.py2_modules['urllib'].ContentTooShortError + + with suspend_hooks(): + from urllib import ContentTooShortError + from urllib2 import URLError, HTTPError diff --git a/src/future/moves/urllib/parse.py b/src/future/moves/urllib/parse.py new file mode 100644 index 00000000..9074b816 --- /dev/null +++ b/src/future/moves/urllib/parse.py @@ -0,0 +1,28 @@ +from __future__ import absolute_import +from future.standard_library import suspend_hooks + +from future.utils import PY3 + +if PY3: + from urllib.parse import * +else: + __future_module__ = True + from urlparse import (ParseResult, SplitResult, parse_qs, parse_qsl, + urldefrag, urljoin, urlparse, urlsplit, + urlunparse, urlunsplit) + + # we use this method to get at the original py2 urllib before any renaming + # quote = sys.py2_modules['urllib'].quote + # quote_plus = sys.py2_modules['urllib'].quote_plus + # unquote = sys.py2_modules['urllib'].unquote + # unquote_plus = sys.py2_modules['urllib'].unquote_plus + # urlencode = sys.py2_modules['urllib'].urlencode + # splitquery = sys.py2_modules['urllib'].splitquery + + with suspend_hooks(): + from urllib import (quote, + quote_plus, + unquote, + unquote_plus, + urlencode, + splitquery) diff --git a/src/future/moves/urllib/request.py b/src/future/moves/urllib/request.py new file mode 100644 index 00000000..972aa4ab --- /dev/null +++ b/src/future/moves/urllib/request.py @@ -0,0 +1,94 @@ +from __future__ import absolute_import + +from future.standard_library import suspend_hooks +from future.utils import PY3 + +if PY3: + from urllib.request import * + # This aren't in __all__: + from urllib.request import (getproxies, + pathname2url, + proxy_bypass, + quote, + request_host, + thishost, + unquote, + url2pathname, + urlcleanup, + urljoin, + urlopen, + urlparse, + urlretrieve, + urlsplit, + urlunparse) + + from urllib.parse import (splitattr, + splithost, + splitpasswd, + splitport, + splitquery, + splittag, + splittype, + splituser, + splitvalue, + to_bytes, + unwrap) +else: + __future_module__ = True + with suspend_hooks(): + from urllib import * + from urllib2 import * + from urlparse import * + + # Rename: + from urllib import toBytes # missing from __all__ on Py2.6 + to_bytes = toBytes + + # from urllib import (pathname2url, + # url2pathname, + # getproxies, + # urlretrieve, + # urlcleanup, + # URLopener, + # FancyURLopener, + # proxy_bypass) + + # from urllib2 import ( + # AbstractBasicAuthHandler, + # AbstractDigestAuthHandler, + # BaseHandler, + # CacheFTPHandler, + # FileHandler, + # FTPHandler, + # HTTPBasicAuthHandler, + # HTTPCookieProcessor, + # HTTPDefaultErrorHandler, + # HTTPDigestAuthHandler, + # HTTPErrorProcessor, + # HTTPHandler, + # HTTPPasswordMgr, + # HTTPPasswordMgrWithDefaultRealm, + # HTTPRedirectHandler, + # HTTPSHandler, + # URLError, + # build_opener, + # install_opener, + # OpenerDirector, + # ProxyBasicAuthHandler, + # ProxyDigestAuthHandler, + # ProxyHandler, + # Request, + # UnknownHandler, + # urlopen, + # ) + + # from urlparse import ( + # urldefrag + # urljoin, + # urlparse, + # urlunparse, + # urlsplit, + # urlunsplit, + # parse_qs, + # parse_q" + # ) diff --git a/src/future/moves/urllib/response.py b/src/future/moves/urllib/response.py new file mode 100644 index 00000000..a287ae28 --- /dev/null +++ b/src/future/moves/urllib/response.py @@ -0,0 +1,12 @@ +from future import standard_library +from future.utils import PY3 + +if PY3: + from urllib.response import * +else: + __future_module__ = True + with standard_library.suspend_hooks(): + from urllib import (addbase, + addclosehook, + addinfo, + addinfourl) diff --git a/src/future/moves/urllib/robotparser.py b/src/future/moves/urllib/robotparser.py new file mode 100644 index 00000000..0dc8f571 --- /dev/null +++ b/src/future/moves/urllib/robotparser.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from urllib.robotparser import * +else: + __future_module__ = True + from robotparser import * diff --git a/src/future/moves/winreg.py b/src/future/moves/winreg.py new file mode 100644 index 00000000..c8b14756 --- /dev/null +++ b/src/future/moves/winreg.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from winreg import * +else: + __future_module__ = True + from _winreg import * diff --git a/src/future/moves/xmlrpc/__init__.py b/src/future/moves/xmlrpc/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/future/moves/xmlrpc/client.py b/src/future/moves/xmlrpc/client.py new file mode 100644 index 00000000..4708cf89 --- /dev/null +++ b/src/future/moves/xmlrpc/client.py @@ -0,0 +1,7 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from xmlrpc.client import * +else: + from xmlrpclib import * diff --git a/src/future/moves/xmlrpc/server.py b/src/future/moves/xmlrpc/server.py new file mode 100644 index 00000000..1a8af345 --- /dev/null +++ b/src/future/moves/xmlrpc/server.py @@ -0,0 +1,7 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from xmlrpc.server import * +else: + from xmlrpclib import * diff --git a/src/future/standard_library/__init__.py b/src/future/standard_library/__init__.py new file mode 100644 index 00000000..d467aaf4 --- /dev/null +++ b/src/future/standard_library/__init__.py @@ -0,0 +1,821 @@ +""" +Python 3 reorganized the standard library (PEP 3108). This module exposes +several standard library modules to Python 2 under their new Python 3 +names. + +It is designed to be used as follows:: + + from future import standard_library + standard_library.install_aliases() + +And then these normal Py3 imports work on both Py3 and Py2:: + + import builtins + import copyreg + import queue + import reprlib + import socketserver + import winreg # on Windows only + import test.support + import html, html.parser, html.entities + import http, http.client, http.server + import http.cookies, http.cookiejar + import urllib.parse, urllib.request, urllib.response, urllib.error, urllib.robotparser + import xmlrpc.client, xmlrpc.server + + import _thread + import _dummy_thread + import _markupbase + + from itertools import filterfalse, zip_longest + from sys import intern + from collections import UserDict, UserList, UserString + from collections import OrderedDict, Counter, ChainMap # even on Py2.6 + from subprocess import getoutput, getstatusoutput + from subprocess import check_output # even on Py2.6 + from multiprocessing import SimpleQueue + +(The renamed modules and functions are still available under their old +names on Python 2.) + +This is a cleaner alternative to this idiom (see +http://docs.pythonsprints.com/python3_porting/py-porting.html):: + + try: + import queue + except ImportError: + import Queue as queue + + +Limitations +----------- +We don't currently support these modules, but would like to:: + + import dbm + import dbm.dumb + import dbm.gnu + import collections.abc # on Py33 + import pickle # should (optionally) bring in cPickle on Python 2 + +""" + +from __future__ import absolute_import, division, print_function + +import sys +import logging +# imp was deprecated in python 3.6 +if sys.version_info >= (3, 6): + import importlib as imp +else: + import imp +import contextlib +import copy +import os + +# Make a dedicated logger; leave the root logger to be configured +# by the application. +flog = logging.getLogger('future_stdlib') +_formatter = logging.Formatter(logging.BASIC_FORMAT) +_handler = logging.StreamHandler() +_handler.setFormatter(_formatter) +flog.addHandler(_handler) +flog.setLevel(logging.WARN) + +from future.utils import PY2, PY3 + +# The modules that are defined under the same names on Py3 but with +# different contents in a significant way (e.g. submodules) are: +# pickle (fast one) +# dbm +# urllib +# test +# email + +REPLACED_MODULES = set(['test', 'urllib', 'pickle', 'dbm']) # add email and dbm when we support it + +# The following module names are not present in Python 2.x, so they cause no +# potential clashes between the old and new names: +# http +# html +# tkinter +# xmlrpc +# Keys: Py2 / real module names +# Values: Py3 / simulated module names +RENAMES = { + # 'cStringIO': 'io', # there's a new io module in Python 2.6 + # that provides StringIO and BytesIO + # 'StringIO': 'io', # ditto + # 'cPickle': 'pickle', + '__builtin__': 'builtins', + 'copy_reg': 'copyreg', + 'Queue': 'queue', + 'future.moves.socketserver': 'socketserver', + 'ConfigParser': 'configparser', + 'repr': 'reprlib', + 'multiprocessing.queues': 'multiprocessing', + # 'FileDialog': 'tkinter.filedialog', + # 'tkFileDialog': 'tkinter.filedialog', + # 'SimpleDialog': 'tkinter.simpledialog', + # 'tkSimpleDialog': 'tkinter.simpledialog', + # 'tkColorChooser': 'tkinter.colorchooser', + # 'tkCommonDialog': 'tkinter.commondialog', + # 'Dialog': 'tkinter.dialog', + # 'Tkdnd': 'tkinter.dnd', + # 'tkFont': 'tkinter.font', + # 'tkMessageBox': 'tkinter.messagebox', + # 'ScrolledText': 'tkinter.scrolledtext', + # 'Tkconstants': 'tkinter.constants', + # 'Tix': 'tkinter.tix', + # 'ttk': 'tkinter.ttk', + # 'Tkinter': 'tkinter', + '_winreg': 'winreg', + 'thread': '_thread', + 'dummy_thread': '_dummy_thread' if sys.version_info < (3, 9) else '_thread', + # 'anydbm': 'dbm', # causes infinite import loop + # 'whichdb': 'dbm', # causes infinite import loop + # anydbm and whichdb are handled by fix_imports2 + # 'dbhash': 'dbm.bsd', + # 'dumbdbm': 'dbm.dumb', + # 'dbm': 'dbm.ndbm', + # 'gdbm': 'dbm.gnu', + 'future.moves.xmlrpc': 'xmlrpc', + # 'future.backports.email': 'email', # for use by urllib + # 'DocXMLRPCServer': 'xmlrpc.server', + # 'SimpleXMLRPCServer': 'xmlrpc.server', + # 'httplib': 'http.client', + # 'htmlentitydefs' : 'html.entities', + # 'HTMLParser' : 'html.parser', + # 'Cookie': 'http.cookies', + # 'cookielib': 'http.cookiejar', + # 'BaseHTTPServer': 'http.server', + # 'SimpleHTTPServer': 'http.server', + # 'CGIHTTPServer': 'http.server', + # 'future.backports.test': 'test', # primarily for renaming test_support to support + # 'commands': 'subprocess', + # 'urlparse' : 'urllib.parse', + # 'robotparser' : 'urllib.robotparser', + # 'abc': 'collections.abc', # for Py33 + # 'future.utils.six.moves.html': 'html', + # 'future.utils.six.moves.http': 'http', + 'future.moves.html': 'html', + 'future.moves.http': 'http', + # 'future.backports.urllib': 'urllib', + # 'future.utils.six.moves.urllib': 'urllib', + 'future.moves._markupbase': '_markupbase', + } + + +# It is complicated and apparently brittle to mess around with the +# ``sys.modules`` cache in order to support "import urllib" meaning two +# different things (Py2.7 urllib and backported Py3.3-like urllib) in different +# contexts. So we require explicit imports for these modules. +assert len(set(RENAMES.values()) & set(REPLACED_MODULES)) == 0 + + +# Harmless renames that we can insert. +# These modules need names from elsewhere being added to them: +# subprocess: should provide getoutput and other fns from commands +# module but these fns are missing: getstatus, mk2arg, +# mkarg +# re: needs an ASCII constant that works compatibly with Py3 + +# etc: see lib2to3/fixes/fix_imports.py + +# (New module name, new object name, old module name, old object name) +MOVES = [('collections', 'UserList', 'UserList', 'UserList'), + ('collections', 'UserDict', 'UserDict', 'UserDict'), + ('collections', 'UserString','UserString', 'UserString'), + ('collections', 'ChainMap', 'future.backports.misc', 'ChainMap'), + ('itertools', 'filterfalse','itertools', 'ifilterfalse'), + ('itertools', 'zip_longest','itertools', 'izip_longest'), + ('sys', 'intern','__builtin__', 'intern'), + ('multiprocessing', 'SimpleQueue', 'multiprocessing.queues', 'SimpleQueue'), + # The re module has no ASCII flag in Py2, but this is the default. + # Set re.ASCII to a zero constant. stat.ST_MODE just happens to be one + # (and it exists on Py2.6+). + ('re', 'ASCII','stat', 'ST_MODE'), + ('base64', 'encodebytes','base64', 'encodestring'), + ('base64', 'decodebytes','base64', 'decodestring'), + ('subprocess', 'getoutput', 'commands', 'getoutput'), + ('subprocess', 'getstatusoutput', 'commands', 'getstatusoutput'), + ('subprocess', 'check_output', 'future.backports.misc', 'check_output'), + ('math', 'ceil', 'future.backports.misc', 'ceil'), + ('collections', 'OrderedDict', 'future.backports.misc', 'OrderedDict'), + ('collections', 'Counter', 'future.backports.misc', 'Counter'), + ('collections', 'ChainMap', 'future.backports.misc', 'ChainMap'), + ('itertools', 'count', 'future.backports.misc', 'count'), + ('reprlib', 'recursive_repr', 'future.backports.misc', 'recursive_repr'), + ('functools', 'cmp_to_key', 'future.backports.misc', 'cmp_to_key'), + +# This is no use, since "import urllib.request" etc. still fails: +# ('urllib', 'error', 'future.moves.urllib', 'error'), +# ('urllib', 'parse', 'future.moves.urllib', 'parse'), +# ('urllib', 'request', 'future.moves.urllib', 'request'), +# ('urllib', 'response', 'future.moves.urllib', 'response'), +# ('urllib', 'robotparser', 'future.moves.urllib', 'robotparser'), + ] + + +# A minimal example of an import hook: +# class WarnOnImport(object): +# def __init__(self, *args): +# self.module_names = args +# +# def find_module(self, fullname, path=None): +# if fullname in self.module_names: +# self.path = path +# return self +# return None +# +# def load_module(self, name): +# if name in sys.modules: +# return sys.modules[name] +# module_info = imp.find_module(name, self.path) +# module = imp.load_module(name, *module_info) +# sys.modules[name] = module +# flog.warning("Imported deprecated module %s", name) +# return module + + +class RenameImport(object): + """ + A class for import hooks mapping Py3 module names etc. to the Py2 equivalents. + """ + # Different RenameImport classes are created when importing this module from + # different source files. This causes isinstance(hook, RenameImport) checks + # to produce inconsistent results. We add this RENAMER attribute here so + # remove_hooks() and install_hooks() can find instances of these classes + # easily: + RENAMER = True + + def __init__(self, old_to_new): + ''' + Pass in a dictionary-like object mapping from old names to new + names. E.g. {'ConfigParser': 'configparser', 'cPickle': 'pickle'} + ''' + self.old_to_new = old_to_new + both = set(old_to_new.keys()) & set(old_to_new.values()) + assert (len(both) == 0 and + len(set(old_to_new.values())) == len(old_to_new.values())), \ + 'Ambiguity in renaming (handler not implemented)' + self.new_to_old = dict((new, old) for (old, new) in old_to_new.items()) + + def find_module(self, fullname, path=None): + # Handles hierarchical importing: package.module.module2 + new_base_names = set([s.split('.')[0] for s in self.new_to_old]) + # Before v0.12: Was: if fullname in set(self.old_to_new) | new_base_names: + if fullname in new_base_names: + return self + return None + + def load_module(self, name): + path = None + if name in sys.modules: + return sys.modules[name] + elif name in self.new_to_old: + # New name. Look up the corresponding old (Py2) name: + oldname = self.new_to_old[name] + module = self._find_and_load_module(oldname) + # module.__future_module__ = True + else: + module = self._find_and_load_module(name) + # In any case, make it available under the requested (Py3) name + sys.modules[name] = module + return module + + def _find_and_load_module(self, name, path=None): + """ + Finds and loads it. But if there's a . in the name, handles it + properly. + """ + bits = name.split('.') + while len(bits) > 1: + # Treat the first bit as a package + packagename = bits.pop(0) + package = self._find_and_load_module(packagename, path) + try: + path = package.__path__ + except AttributeError: + # This could be e.g. moves. + flog.debug('Package {0} has no __path__.'.format(package)) + if name in sys.modules: + return sys.modules[name] + flog.debug('What to do here?') + + name = bits[0] + module_info = imp.find_module(name, path) + return imp.load_module(name, *module_info) + + +class hooks(object): + """ + Acts as a context manager. Saves the state of sys.modules and restores it + after the 'with' block. + + Use like this: + + >>> from future import standard_library + >>> with standard_library.hooks(): + ... import http.client + >>> import requests + + For this to work, http.client will be scrubbed from sys.modules after the + 'with' block. That way the modules imported in the 'with' block will + continue to be accessible in the current namespace but not from any + imported modules (like requests). + """ + def __enter__(self): + # flog.debug('Entering hooks context manager') + self.old_sys_modules = copy.copy(sys.modules) + self.hooks_were_installed = detect_hooks() + # self.scrubbed = scrub_py2_sys_modules() + install_hooks() + return self + + def __exit__(self, *args): + # flog.debug('Exiting hooks context manager') + # restore_sys_modules(self.scrubbed) + if not self.hooks_were_installed: + remove_hooks() + # scrub_future_sys_modules() + +# Sanity check for is_py2_stdlib_module(): We aren't replacing any +# builtin modules names: +if PY2: + assert len(set(RENAMES.values()) & set(sys.builtin_module_names)) == 0 + + +def is_py2_stdlib_module(m): + """ + Tries to infer whether the module m is from the Python 2 standard library. + This may not be reliable on all systems. + """ + if PY3: + return False + if not 'stdlib_path' in is_py2_stdlib_module.__dict__: + stdlib_files = [contextlib.__file__, os.__file__, copy.__file__] + stdlib_paths = [os.path.split(f)[0] for f in stdlib_files] + if not len(set(stdlib_paths)) == 1: + # This seems to happen on travis-ci.org. Very strange. We'll try to + # ignore it. + flog.warn('Multiple locations found for the Python standard ' + 'library: %s' % stdlib_paths) + # Choose the first one arbitrarily + is_py2_stdlib_module.stdlib_path = stdlib_paths[0] + + if m.__name__ in sys.builtin_module_names: + return True + + if hasattr(m, '__file__'): + modpath = os.path.split(m.__file__) + if (modpath[0].startswith(is_py2_stdlib_module.stdlib_path) and + 'site-packages' not in modpath[0]): + return True + + return False + + +def scrub_py2_sys_modules(): + """ + Removes any Python 2 standard library modules from ``sys.modules`` that + would interfere with Py3-style imports using import hooks. Examples are + modules with the same names (like urllib or email). + + (Note that currently import hooks are disabled for modules like these + with ambiguous names anyway ...) + """ + if PY3: + return {} + scrubbed = {} + for modulename in REPLACED_MODULES & set(RENAMES.keys()): + if not modulename in sys.modules: + continue + + module = sys.modules[modulename] + + if is_py2_stdlib_module(module): + flog.debug('Deleting (Py2) {} from sys.modules'.format(modulename)) + scrubbed[modulename] = sys.modules[modulename] + del sys.modules[modulename] + return scrubbed + + +def scrub_future_sys_modules(): + """ + Deprecated. + """ + return {} + +class suspend_hooks(object): + """ + Acts as a context manager. Use like this: + + >>> from future import standard_library + >>> standard_library.install_hooks() + >>> import http.client + >>> # ... + >>> with standard_library.suspend_hooks(): + >>> import requests # incompatible with ``future``'s standard library hooks + + If the hooks were disabled before the context, they are not installed when + the context is left. + """ + def __enter__(self): + self.hooks_were_installed = detect_hooks() + remove_hooks() + # self.scrubbed = scrub_future_sys_modules() + return self + + def __exit__(self, *args): + if self.hooks_were_installed: + install_hooks() + # restore_sys_modules(self.scrubbed) + + +def restore_sys_modules(scrubbed): + """ + Add any previously scrubbed modules back to the sys.modules cache, + but only if it's safe to do so. + """ + clash = set(sys.modules) & set(scrubbed) + if len(clash) != 0: + # If several, choose one arbitrarily to raise an exception about + first = list(clash)[0] + raise ImportError('future module {} clashes with Py2 module' + .format(first)) + sys.modules.update(scrubbed) + + +def install_aliases(): + """ + Monkey-patches the standard library in Py2.6/7 to provide + aliases for better Py3 compatibility. + """ + if PY3: + return + # if hasattr(install_aliases, 'run_already'): + # return + for (newmodname, newobjname, oldmodname, oldobjname) in MOVES: + __import__(newmodname) + # We look up the module in sys.modules because __import__ just returns the + # top-level package: + newmod = sys.modules[newmodname] + # newmod.__future_module__ = True + + __import__(oldmodname) + oldmod = sys.modules[oldmodname] + + obj = getattr(oldmod, oldobjname) + setattr(newmod, newobjname, obj) + + # Hack for urllib so it appears to have the same structure on Py2 as on Py3 + import urllib + from future.backports.urllib import request + from future.backports.urllib import response + from future.backports.urllib import parse + from future.backports.urllib import error + from future.backports.urllib import robotparser + urllib.request = request + urllib.response = response + urllib.parse = parse + urllib.error = error + urllib.robotparser = robotparser + sys.modules['urllib.request'] = request + sys.modules['urllib.response'] = response + sys.modules['urllib.parse'] = parse + sys.modules['urllib.error'] = error + sys.modules['urllib.robotparser'] = robotparser + + # Patch the test module so it appears to have the same structure on Py2 as on Py3 + try: + import test + except ImportError: + pass + try: + from future.moves.test import support + except ImportError: + pass + else: + test.support = support + sys.modules['test.support'] = support + + # Patch the dbm module so it appears to have the same structure on Py2 as on Py3 + try: + import dbm + except ImportError: + pass + else: + from future.moves.dbm import dumb + dbm.dumb = dumb + sys.modules['dbm.dumb'] = dumb + try: + from future.moves.dbm import gnu + except ImportError: + pass + else: + dbm.gnu = gnu + sys.modules['dbm.gnu'] = gnu + try: + from future.moves.dbm import ndbm + except ImportError: + pass + else: + dbm.ndbm = ndbm + sys.modules['dbm.ndbm'] = ndbm + + # install_aliases.run_already = True + + +def install_hooks(): + """ + This function installs the future.standard_library import hook into + sys.meta_path. + """ + if PY3: + return + + install_aliases() + + flog.debug('sys.meta_path was: {0}'.format(sys.meta_path)) + flog.debug('Installing hooks ...') + + # Add it unless it's there already + newhook = RenameImport(RENAMES) + if not detect_hooks(): + sys.meta_path.append(newhook) + flog.debug('sys.meta_path is now: {0}'.format(sys.meta_path)) + + +def enable_hooks(): + """ + Deprecated. Use install_hooks() instead. This will be removed by + ``future`` v1.0. + """ + install_hooks() + + +def remove_hooks(scrub_sys_modules=False): + """ + This function removes the import hook from sys.meta_path. + """ + if PY3: + return + flog.debug('Uninstalling hooks ...') + # Loop backwards, so deleting items keeps the ordering: + for i, hook in list(enumerate(sys.meta_path))[::-1]: + if hasattr(hook, 'RENAMER'): + del sys.meta_path[i] + + # Explicit is better than implicit. In the future the interface should + # probably change so that scrubbing the import hooks requires a separate + # function call. Left as is for now for backward compatibility with + # v0.11.x. + if scrub_sys_modules: + scrub_future_sys_modules() + + +def disable_hooks(): + """ + Deprecated. Use remove_hooks() instead. This will be removed by + ``future`` v1.0. + """ + remove_hooks() + + +def detect_hooks(): + """ + Returns True if the import hooks are installed, False if not. + """ + flog.debug('Detecting hooks ...') + present = any([hasattr(hook, 'RENAMER') for hook in sys.meta_path]) + if present: + flog.debug('Detected.') + else: + flog.debug('Not detected.') + return present + + +# As of v0.12, this no longer happens implicitly: +# if not PY3: +# install_hooks() + + +if not hasattr(sys, 'py2_modules'): + sys.py2_modules = {} + +def cache_py2_modules(): + """ + Currently this function is unneeded, as we are not attempting to provide import hooks + for modules with ambiguous names: email, urllib, pickle. + """ + if len(sys.py2_modules) != 0: + return + assert not detect_hooks() + import urllib + sys.py2_modules['urllib'] = urllib + + import email + sys.py2_modules['email'] = email + + import pickle + sys.py2_modules['pickle'] = pickle + + # Not all Python installations have test module. (Anaconda doesn't, for example.) + # try: + # import test + # except ImportError: + # sys.py2_modules['test'] = None + # sys.py2_modules['test'] = test + + # import dbm + # sys.py2_modules['dbm'] = dbm + + +def import_(module_name, backport=False): + """ + Pass a (potentially dotted) module name of a Python 3 standard library + module. This function imports the module compatibly on Py2 and Py3 and + returns the top-level module. + + Example use: + >>> http = import_('http.client') + >>> http = import_('http.server') + >>> urllib = import_('urllib.request') + + Then: + >>> conn = http.client.HTTPConnection(...) + >>> response = urllib.request.urlopen('http://mywebsite.com') + >>> # etc. + + Use as follows: + >>> package_name = import_(module_name) + + On Py3, equivalent to this: + + >>> import module_name + + On Py2, equivalent to this if backport=False: + + >>> from future.moves import module_name + + or to this if backport=True: + + >>> from future.backports import module_name + + except that it also handles dotted module names such as ``http.client`` + The effect then is like this: + + >>> from future.backports import module + >>> from future.backports.module import submodule + >>> module.submodule = submodule + + Note that this would be a SyntaxError in Python: + + >>> from future.backports import http.client + + """ + # Python 2.6 doesn't have importlib in the stdlib, so it requires + # the backported ``importlib`` package from PyPI as a dependency to use + # this function: + import importlib + + if PY3: + return __import__(module_name) + else: + # client.blah = blah + # Then http.client = client + # etc. + if backport: + prefix = 'future.backports' + else: + prefix = 'future.moves' + parts = prefix.split('.') + module_name.split('.') + + modules = [] + for i, part in enumerate(parts): + sofar = '.'.join(parts[:i+1]) + modules.append(importlib.import_module(sofar)) + for i, part in reversed(list(enumerate(parts))): + if i == 0: + break + setattr(modules[i-1], part, modules[i]) + + # Return the next-most top-level module after future.backports / future.moves: + return modules[2] + + +def from_import(module_name, *symbol_names, **kwargs): + """ + Example use: + >>> HTTPConnection = from_import('http.client', 'HTTPConnection') + >>> HTTPServer = from_import('http.server', 'HTTPServer') + >>> urlopen, urlparse = from_import('urllib.request', 'urlopen', 'urlparse') + + Equivalent to this on Py3: + + >>> from module_name import symbol_names[0], symbol_names[1], ... + + and this on Py2: + + >>> from future.moves.module_name import symbol_names[0], ... + + or: + + >>> from future.backports.module_name import symbol_names[0], ... + + except that it also handles dotted module names such as ``http.client``. + """ + + if PY3: + return __import__(module_name) + else: + if 'backport' in kwargs and bool(kwargs['backport']): + prefix = 'future.backports' + else: + prefix = 'future.moves' + parts = prefix.split('.') + module_name.split('.') + module = importlib.import_module(prefix + '.' + module_name) + output = [getattr(module, name) for name in symbol_names] + if len(output) == 1: + return output[0] + else: + return output + + +class exclude_local_folder_imports(object): + """ + A context-manager that prevents standard library modules like configparser + from being imported from the local python-future source folder on Py3. + + (This was need prior to v0.16.0 because the presence of a configparser + folder would otherwise have prevented setuptools from running on Py3. Maybe + it's not needed any more?) + """ + def __init__(self, *args): + assert len(args) > 0 + self.module_names = args + # Disallow dotted module names like http.client: + if any(['.' in m for m in self.module_names]): + raise NotImplementedError('Dotted module names are not supported') + + def __enter__(self): + self.old_sys_path = copy.copy(sys.path) + self.old_sys_modules = copy.copy(sys.modules) + if sys.version_info[0] < 3: + return + # The presence of all these indicates we've found our source folder, + # because `builtins` won't have been installed in site-packages by setup.py: + FUTURE_SOURCE_SUBFOLDERS = ['future', 'past', 'libfuturize', 'libpasteurize', 'builtins'] + + # Look for the future source folder: + for folder in self.old_sys_path: + if all([os.path.exists(os.path.join(folder, subfolder)) + for subfolder in FUTURE_SOURCE_SUBFOLDERS]): + # Found it. Remove it. + sys.path.remove(folder) + + # Ensure we import the system module: + for m in self.module_names: + # Delete the module and any submodules from sys.modules: + # for key in list(sys.modules): + # if key == m or key.startswith(m + '.'): + # try: + # del sys.modules[key] + # except KeyError: + # pass + try: + module = __import__(m, level=0) + except ImportError: + # There's a problem importing the system module. E.g. the + # winreg module is not available except on Windows. + pass + + def __exit__(self, *args): + # Restore sys.path and sys.modules: + sys.path = self.old_sys_path + for m in set(self.old_sys_modules.keys()) - set(sys.modules.keys()): + sys.modules[m] = self.old_sys_modules[m] + +TOP_LEVEL_MODULES = ['builtins', + 'copyreg', + 'html', + 'http', + 'queue', + 'reprlib', + 'socketserver', + 'test', + 'tkinter', + 'winreg', + 'xmlrpc', + '_dummy_thread', + '_markupbase', + '_thread', + ] + +def import_top_level_modules(): + with exclude_local_folder_imports(*TOP_LEVEL_MODULES): + for m in TOP_LEVEL_MODULES: + try: + __import__(m) + except ImportError: # e.g. winreg + pass diff --git a/src/future/tests/__init__.py b/src/future/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/future/tests/base.py b/src/future/tests/base.py new file mode 100644 index 00000000..4ef437ba --- /dev/null +++ b/src/future/tests/base.py @@ -0,0 +1,539 @@ +from __future__ import print_function, absolute_import +import os +import tempfile +import unittest +import sys +import re +import warnings +import io +from textwrap import dedent + +from future.utils import bind_method, PY26, PY3, PY2, PY27 +from future.moves.subprocess import check_output, STDOUT, CalledProcessError + +if PY26: + import unittest2 as unittest + + +def reformat_code(code): + """ + Removes any leading \n and dedents. + """ + if code.startswith('\n'): + code = code[1:] + return dedent(code) + + +def order_future_lines(code): + """ + Returns the code block with any ``__future__`` import lines sorted, and + then any ``future`` import lines sorted, then any ``builtins`` import lines + sorted. + + This only sorts the lines within the expected blocks. + + See test_order_future_lines() for an example. + """ + + # We need .splitlines(keepends=True), which doesn't exist on Py2, + # so we use this instead: + lines = code.split('\n') + + uufuture_line_numbers = [i for i, line in enumerate(lines) + if line.startswith('from __future__ import ')] + + future_line_numbers = [i for i, line in enumerate(lines) + if line.startswith('from future') + or line.startswith('from past')] + + builtins_line_numbers = [i for i, line in enumerate(lines) + if line.startswith('from builtins')] + + assert code.lstrip() == code, ('internal usage error: ' + 'dedent the code before calling order_future_lines()') + + def mymax(numbers): + return max(numbers) if len(numbers) > 0 else 0 + + def mymin(numbers): + return min(numbers) if len(numbers) > 0 else float('inf') + + assert mymax(uufuture_line_numbers) <= mymin(future_line_numbers), \ + 'the __future__ and future imports are out of order' + + # assert mymax(future_line_numbers) <= mymin(builtins_line_numbers), \ + # 'the future and builtins imports are out of order' + + uul = sorted([lines[i] for i in uufuture_line_numbers]) + sorted_uufuture_lines = dict(zip(uufuture_line_numbers, uul)) + + fl = sorted([lines[i] for i in future_line_numbers]) + sorted_future_lines = dict(zip(future_line_numbers, fl)) + + bl = sorted([lines[i] for i in builtins_line_numbers]) + sorted_builtins_lines = dict(zip(builtins_line_numbers, bl)) + + # Replace the old unsorted "from __future__ import ..." lines with the + # new sorted ones: + new_lines = [] + for i in range(len(lines)): + if i in uufuture_line_numbers: + new_lines.append(sorted_uufuture_lines[i]) + elif i in future_line_numbers: + new_lines.append(sorted_future_lines[i]) + elif i in builtins_line_numbers: + new_lines.append(sorted_builtins_lines[i]) + else: + new_lines.append(lines[i]) + return '\n'.join(new_lines) + + +class VerboseCalledProcessError(CalledProcessError): + """ + Like CalledProcessError, but it displays more information (message and + script output) for diagnosing test failures etc. + """ + def __init__(self, msg, returncode, cmd, output=None): + self.msg = msg + self.returncode = returncode + self.cmd = cmd + self.output = output + + def __str__(self): + return ("Command '%s' failed with exit status %d\nMessage: %s\nOutput: %s" + % (self.cmd, self.returncode, self.msg, self.output)) + +class FuturizeError(VerboseCalledProcessError): + pass + +class PasteurizeError(VerboseCalledProcessError): + pass + + +class CodeHandler(unittest.TestCase): + """ + Handy mixin for test classes for writing / reading / futurizing / + running .py files in the test suite. + """ + def setUp(self): + """ + The outputs from the various futurize stages should have the + following headers: + """ + # After stage1: + # TODO: use this form after implementing a fixer to consolidate + # __future__ imports into a single line: + # self.headers1 = """ + # from __future__ import absolute_import, division, print_function + # """ + self.headers1 = reformat_code(""" + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function + """) + + # After stage2 --all-imports: + # TODO: use this form after implementing a fixer to consolidate + # __future__ imports into a single line: + # self.headers2 = """ + # from __future__ import (absolute_import, division, + # print_function, unicode_literals) + # from future import standard_library + # from future.builtins import * + # """ + self.headers2 = reformat_code(""" + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function + from __future__ import unicode_literals + from future import standard_library + standard_library.install_aliases() + from builtins import * + """) + self.interpreters = [sys.executable] + self.tempdir = tempfile.mkdtemp() + os.path.sep + pypath = os.getenv('PYTHONPATH') + if pypath: + self.env = {'PYTHONPATH': os.getcwd() + os.pathsep + pypath} + else: + self.env = {'PYTHONPATH': os.getcwd()} + + def convert(self, code, stages=(1, 2), all_imports=False, from3=False, + reformat=True, run=True, conservative=False): + """ + Converts the code block using ``futurize`` and returns the + resulting code. + + Passing stages=[1] or stages=[2] passes the flag ``--stage1`` or + ``stage2`` to ``futurize``. Passing both stages runs ``futurize`` + with both stages by default. + + If from3 is False, runs ``futurize``, converting from Python 2 to + both 2 and 3. If from3 is True, runs ``pasteurize`` to convert + from Python 3 to both 2 and 3. + + Optionally reformats the code block first using the reformat() function. + + If run is True, runs the resulting code under all Python + interpreters in self.interpreters. + """ + if reformat: + code = reformat_code(code) + self._write_test_script(code) + self._futurize_test_script(stages=stages, all_imports=all_imports, + from3=from3, conservative=conservative) + output = self._read_test_script() + if run: + for interpreter in self.interpreters: + _ = self._run_test_script(interpreter=interpreter) + return output + + def compare(self, output, expected, ignore_imports=True): + """ + Compares whether the code blocks are equal. If not, raises an + exception so the test fails. Ignores any trailing whitespace like + blank lines. + + If ignore_imports is True, passes the code blocks into the + strip_future_imports method. + + If one code block is a unicode string and the other a + byte-string, it assumes the byte-string is encoded as utf-8. + """ + if ignore_imports: + output = self.strip_future_imports(output) + expected = self.strip_future_imports(expected) + if isinstance(output, bytes) and not isinstance(expected, bytes): + output = output.decode('utf-8') + if isinstance(expected, bytes) and not isinstance(output, bytes): + expected = expected.decode('utf-8') + self.assertEqual(order_future_lines(output.rstrip()), + expected.rstrip()) + + def strip_future_imports(self, code): + """ + Strips any of these import lines: + + from __future__ import + from future + from future. + from builtins + + or any line containing: + install_hooks() + or: + install_aliases() + + Limitation: doesn't handle imports split across multiple lines like + this: + + from __future__ import (absolute_import, division, print_function, + unicode_literals) + """ + output = [] + # We need .splitlines(keepends=True), which doesn't exist on Py2, + # so we use this instead: + for line in code.split('\n'): + if not (line.startswith('from __future__ import ') + or line.startswith('from future ') + or line.startswith('from builtins ') + or 'install_hooks()' in line + or 'install_aliases()' in line + # but don't match "from future_builtins" :) + or line.startswith('from future.')): + output.append(line) + return '\n'.join(output) + + def convert_check(self, before, expected, stages=(1, 2), all_imports=False, + ignore_imports=True, from3=False, run=True, + conservative=False): + """ + Convenience method that calls convert() and compare(). + + Reformats the code blocks automatically using the reformat_code() + function. + + If all_imports is passed, we add the appropriate import headers + for the stage(s) selected to the ``expected`` code-block, so they + needn't appear repeatedly in the test code. + + If ignore_imports is True, ignores the presence of any lines + beginning: + + from __future__ import ... + from future import ... + + for the purpose of the comparison. + """ + output = self.convert(before, stages=stages, all_imports=all_imports, + from3=from3, run=run, conservative=conservative) + if all_imports: + headers = self.headers2 if 2 in stages else self.headers1 + else: + headers = '' + + reformatted = reformat_code(expected) + if headers in reformatted: + headers = '' + + self.compare(output, headers + reformatted, + ignore_imports=ignore_imports) + + def unchanged(self, code, **kwargs): + """ + Convenience method to ensure the code is unchanged by the + futurize process. + """ + self.convert_check(code, code, **kwargs) + + def _write_test_script(self, code, filename='mytestscript.py'): + """ + Dedents the given code (a multiline string) and writes it out to + a file in a temporary folder like /tmp/tmpUDCn7x/mytestscript.py. + """ + if isinstance(code, bytes): + code = code.decode('utf-8') + # Be explicit about encoding the temp file as UTF-8 (issue #63): + with io.open(self.tempdir + filename, 'wt', encoding='utf-8') as f: + f.write(dedent(code)) + + def _read_test_script(self, filename='mytestscript.py'): + with io.open(self.tempdir + filename, 'rt', encoding='utf-8') as f: + newsource = f.read() + return newsource + + def _futurize_test_script(self, filename='mytestscript.py', stages=(1, 2), + all_imports=False, from3=False, + conservative=False): + params = [] + stages = list(stages) + if all_imports: + params.append('--all-imports') + if from3: + script = 'pasteurize.py' + else: + script = 'futurize.py' + if stages == [1]: + params.append('--stage1') + elif stages == [2]: + params.append('--stage2') + else: + assert stages == [1, 2] + if conservative: + params.append('--conservative') + # No extra params needed + + # Absolute file path: + fn = self.tempdir + filename + call_args = [sys.executable, script] + params + ['-w', fn] + try: + output = check_output(call_args, stderr=STDOUT, env=self.env) + except CalledProcessError as e: + with open(fn) as f: + msg = ( + 'Error running the command %s\n' + '%s\n' + 'Contents of file %s:\n' + '\n' + '%s') % ( + ' '.join(call_args), + 'env=%s' % self.env, + fn, + '----\n%s\n----' % f.read(), + ) + ErrorClass = (FuturizeError if 'futurize' in script else PasteurizeError) + + if not hasattr(e, 'output'): + # The attribute CalledProcessError.output doesn't exist on Py2.6 + e.output = None + raise ErrorClass(msg, e.returncode, e.cmd, output=e.output) + return output + + def _run_test_script(self, filename='mytestscript.py', + interpreter=sys.executable): + # Absolute file path: + fn = self.tempdir + filename + try: + output = check_output([interpreter, fn], + env=self.env, stderr=STDOUT) + except CalledProcessError as e: + with open(fn) as f: + msg = ( + 'Error running the command %s\n' + '%s\n' + 'Contents of file %s:\n' + '\n' + '%s') % ( + ' '.join([interpreter, fn]), + 'env=%s' % self.env, + fn, + '----\n%s\n----' % f.read(), + ) + if not hasattr(e, 'output'): + # The attribute CalledProcessError.output doesn't exist on Py2.6 + e.output = None + raise VerboseCalledProcessError(msg, e.returncode, e.cmd, output=e.output) + return output + + +# Decorator to skip some tests on Python 2.6 ... +skip26 = unittest.skipIf(PY26, "this test is known to fail on Py2.6") + + +def expectedFailurePY3(func): + if not PY3: + return func + return unittest.expectedFailure(func) + +def expectedFailurePY26(func): + if not PY26: + return func + return unittest.expectedFailure(func) + + +def expectedFailurePY27(func): + if not PY27: + return func + return unittest.expectedFailure(func) + + +def expectedFailurePY2(func): + if not PY2: + return func + return unittest.expectedFailure(func) + + +# Renamed in Py3.3: +if not hasattr(unittest.TestCase, 'assertRaisesRegex'): + unittest.TestCase.assertRaisesRegex = unittest.TestCase.assertRaisesRegexp + +# From Py3.3: +def assertRegex(self, text, expected_regex, msg=None): + """Fail the test unless the text matches the regular expression.""" + if isinstance(expected_regex, (str, unicode)): + assert expected_regex, "expected_regex must not be empty." + expected_regex = re.compile(expected_regex) + if not expected_regex.search(text): + msg = msg or "Regex didn't match" + msg = '%s: %r not found in %r' % (msg, expected_regex.pattern, text) + raise self.failureException(msg) + +if not hasattr(unittest.TestCase, 'assertRegex'): + bind_method(unittest.TestCase, 'assertRegex', assertRegex) + +class _AssertRaisesBaseContext(object): + + def __init__(self, expected, test_case, callable_obj=None, + expected_regex=None): + self.expected = expected + self.test_case = test_case + if callable_obj is not None: + try: + self.obj_name = callable_obj.__name__ + except AttributeError: + self.obj_name = str(callable_obj) + else: + self.obj_name = None + if isinstance(expected_regex, (bytes, str)): + expected_regex = re.compile(expected_regex) + self.expected_regex = expected_regex + self.msg = None + + def _raiseFailure(self, standardMsg): + msg = self.test_case._formatMessage(self.msg, standardMsg) + raise self.test_case.failureException(msg) + + def handle(self, name, callable_obj, args, kwargs): + """ + If callable_obj is None, assertRaises/Warns is being used as a + context manager, so check for a 'msg' kwarg and return self. + If callable_obj is not None, call it passing args and kwargs. + """ + if callable_obj is None: + self.msg = kwargs.pop('msg', None) + return self + with self: + callable_obj(*args, **kwargs) + +class _AssertWarnsContext(_AssertRaisesBaseContext): + """A context manager used to implement TestCase.assertWarns* methods.""" + + def __enter__(self): + # The __warningregistry__'s need to be in a pristine state for tests + # to work properly. + for v in sys.modules.values(): + if getattr(v, '__warningregistry__', None): + v.__warningregistry__ = {} + self.warnings_manager = warnings.catch_warnings(record=True) + self.warnings = self.warnings_manager.__enter__() + warnings.simplefilter("always", self.expected) + return self + + def __exit__(self, exc_type, exc_value, tb): + self.warnings_manager.__exit__(exc_type, exc_value, tb) + if exc_type is not None: + # let unexpected exceptions pass through + return + try: + exc_name = self.expected.__name__ + except AttributeError: + exc_name = str(self.expected) + first_matching = None + for m in self.warnings: + w = m.message + if not isinstance(w, self.expected): + continue + if first_matching is None: + first_matching = w + if (self.expected_regex is not None and + not self.expected_regex.search(str(w))): + continue + # store warning for later retrieval + self.warning = w + self.filename = m.filename + self.lineno = m.lineno + return + # Now we simply try to choose a helpful failure message + if first_matching is not None: + self._raiseFailure('"{}" does not match "{}"'.format( + self.expected_regex.pattern, str(first_matching))) + if self.obj_name: + self._raiseFailure("{} not triggered by {}".format(exc_name, + self.obj_name)) + else: + self._raiseFailure("{} not triggered".format(exc_name)) + + +def assertWarns(self, expected_warning, callable_obj=None, *args, **kwargs): + """Fail unless a warning of class warnClass is triggered + by callable_obj when invoked with arguments args and keyword + arguments kwargs. If a different type of warning is + triggered, it will not be handled: depending on the other + warning filtering rules in effect, it might be silenced, printed + out, or raised as an exception. + + If called with callable_obj omitted or None, will return a + context object used like this:: + + with self.assertWarns(SomeWarning): + do_something() + + An optional keyword argument 'msg' can be provided when assertWarns + is used as a context object. + + The context manager keeps a reference to the first matching + warning as the 'warning' attribute; similarly, the 'filename' + and 'lineno' attributes give you information about the line + of Python code from which the warning was triggered. + This allows you to inspect the warning after the assertion:: + + with self.assertWarns(SomeWarning) as cm: + do_something() + the_warning = cm.warning + self.assertEqual(the_warning.some_attribute, 147) + """ + context = _AssertWarnsContext(expected_warning, self, callable_obj) + return context.handle('assertWarns', callable_obj, args, kwargs) + +if not hasattr(unittest.TestCase, 'assertWarns'): + bind_method(unittest.TestCase, 'assertWarns', assertWarns) diff --git a/future/builtins/backports/__init__.py b/src/future/types/__init__.py similarity index 79% rename from future/builtins/backports/__init__.py rename to src/future/types/__init__.py index 4198eb3c..06250770 100644 --- a/future/builtins/backports/__init__.py +++ b/src/future/types/__init__.py @@ -1,23 +1,21 @@ """ -This module contains backports of new or changed functionality from -Python 3 to Python 2: +This module contains backports the data types that were significantly changed +in the transition from Python 2 to Python 3. - an implementation of Python 3's bytes object (pure Python subclass of Python 2's builtin 8-bit str type) - an implementation of Python 3's str object (pure Python subclass of Python 2's builtin unicode type) - a backport of the range iterator from Py3 with slicing support -- the magic zero-argument super() function -- the new round() behaviour It is used as follows:: from __future__ import division, absolute_import, print_function - from future.builtins.backports import str, bytes, range, super, round + from builtins import bytes, dict, int, range, str to bring in the new semantics for these functions from Python 3. And then, for example:: - + b = bytes(b'ABCD') assert list(b) == [65, 66, 67, 68] assert repr(b) == "b'ABCD'" @@ -48,12 +46,22 @@ pass and:: - + class VerboseList(list): def append(self, item): print('Adding an item') super().append(item) # new simpler super() function +For more information: +--------------------- + +- future.types.newbytes +- future.types.newdict +- future.types.newint +- future.types.newobject +- future.types.newrange +- future.types.newstr + Notes ===== @@ -74,14 +82,9 @@ def append(self, item): round() ------- Python 3 modifies the behaviour of ``round()`` to use "Banker's Rounding". -See http://stackoverflow.com/a/10825998_. See the ``newround`` module +See http://stackoverflow.com/a/10825998. See the ``newround`` module docstring for more details. - -TODO: ------ -- Check int() ?? - """ from __future__ import absolute_import, division, print_function @@ -109,7 +112,7 @@ def f(a, b): raises a TypeError when f is called if a unicode object is passed as `a` or a bytes object is passed as `b`. - This also skips over keyword arguments, so + This also skips over keyword arguments, so @disallow_types([0, 1], [unicode, bytes]) def g(a, b=None): @@ -127,7 +130,7 @@ def g(a, b=None): ... def __add__(self, other): ... pass - >>> newbytes('1234') + u'1234' #doctest: +IGNORE_EXCEPTION_DETAIL + >>> newbytes('1234') + u'1234' #doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... TypeError: can't concat 'bytes' to (unicode) str @@ -215,17 +218,40 @@ def issubset(list1, list2): bytes = builtins.bytes dict = builtins.dict int = builtins.int + list = builtins.list + object = builtins.object range = builtins.range - round = builtins.round str = builtins.str - super = builtins.super - __all__ = [] + + # The identity mapping + newtypes = {bytes: bytes, + dict: dict, + int: int, + list: list, + object: object, + range: range, + str: str} + + __all__ = ['newtypes'] + else: - from .newbytes import newbytes as bytes - from .newdict import newdict as dict - from .newint import newint as int - from .newrange import newrange as range - from .newround import newround as round - from .newstr import newstr as str - from .newsuper import newsuper as super - __all__ = ['bytes', 'dict', 'int', 'range', 'round', 'str', 'super'] + + from .newbytes import newbytes + from .newdict import newdict + from .newint import newint + from .newlist import newlist + from .newrange import newrange + from .newobject import newobject + from .newstr import newstr + + newtypes = {bytes: newbytes, + dict: newdict, + int: newint, + long: newint, + list: newlist, + object: newobject, + range: newrange, + str: newbytes, + unicode: newstr} + + __all__ = ['newbytes', 'newdict', 'newint', 'newlist', 'newrange', 'newstr', 'newtypes'] diff --git a/src/future/types/newbytes.py b/src/future/types/newbytes.py new file mode 100644 index 00000000..c9d584a7 --- /dev/null +++ b/src/future/types/newbytes.py @@ -0,0 +1,460 @@ +""" +Pure-Python implementation of a Python 3-like bytes object for Python 2. + +Why do this? Without it, the Python 2 bytes object is a very, very +different beast to the Python 3 bytes object. +""" + +from numbers import Integral +import string +import copy + +from future.utils import istext, isbytes, PY2, PY3, with_metaclass +from future.types import no, issubset +from future.types.newobject import newobject + +if PY2: + from collections import Iterable +else: + from collections.abc import Iterable + + +_builtin_bytes = bytes + +if PY3: + # We'll probably never use newstr on Py3 anyway... + unicode = str + + +class BaseNewBytes(type): + def __instancecheck__(cls, instance): + if cls == newbytes: + return isinstance(instance, _builtin_bytes) + else: + return issubclass(instance.__class__, cls) + + +def _newchr(x): + if isinstance(x, str): # this happens on pypy + return x.encode('ascii') + else: + return chr(x) + + +class newbytes(with_metaclass(BaseNewBytes, _builtin_bytes)): + """ + A backport of the Python 3 bytes object to Py2 + """ + def __new__(cls, *args, **kwargs): + """ + From the Py3 bytes docstring: + + bytes(iterable_of_ints) -> bytes + bytes(string, encoding[, errors]) -> bytes + bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer + bytes(int) -> bytes object of size given by the parameter initialized with null bytes + bytes() -> empty bytes object + + Construct an immutable array of bytes from: + - an iterable yielding integers in range(256) + - a text string encoded using the specified encoding + - any object implementing the buffer API. + - an integer + """ + + encoding = None + errors = None + + if len(args) == 0: + return super(newbytes, cls).__new__(cls) + elif len(args) >= 2: + args = list(args) + if len(args) == 3: + errors = args.pop() + encoding=args.pop() + # Was: elif isinstance(args[0], newbytes): + # We use type() instead of the above because we're redefining + # this to be True for all unicode string subclasses. Warning: + # This may render newstr un-subclassable. + if type(args[0]) == newbytes: + # Special-case: for consistency with Py3.3, we return the same object + # (with the same id) if a newbytes object is passed into the + # newbytes constructor. + return args[0] + elif isinstance(args[0], _builtin_bytes): + value = args[0] + elif isinstance(args[0], unicode): + try: + if 'encoding' in kwargs: + assert encoding is None + encoding = kwargs['encoding'] + if 'errors' in kwargs: + assert errors is None + errors = kwargs['errors'] + except AssertionError: + raise TypeError('Argument given by name and position') + if encoding is None: + raise TypeError('unicode string argument without an encoding') + ### + # Was: value = args[0].encode(**kwargs) + # Python 2.6 string encode() method doesn't take kwargs: + # Use this instead: + newargs = [encoding] + if errors is not None: + newargs.append(errors) + value = args[0].encode(*newargs) + ### + elif hasattr(args[0], '__bytes__'): + value = args[0].__bytes__() + elif isinstance(args[0], Iterable): + if len(args[0]) == 0: + # This could be an empty list or tuple. Return b'' as on Py3. + value = b'' + else: + # Was: elif len(args[0])>0 and isinstance(args[0][0], Integral): + # # It's a list of integers + # But then we can't index into e.g. frozensets. Try to proceed + # anyway. + try: + value = bytearray([_newchr(x) for x in args[0]]) + except: + raise ValueError('bytes must be in range(0, 256)') + elif isinstance(args[0], Integral): + if args[0] < 0: + raise ValueError('negative count') + value = b'\x00' * args[0] + else: + value = args[0] + if type(value) == newbytes: + # Above we use type(...) rather than isinstance(...) because the + # newbytes metaclass overrides __instancecheck__. + # oldbytes(value) gives the wrong thing on Py2: the same + # result as str(value) on Py3, e.g. "b'abc'". (Issue #193). + # So we handle this case separately: + return copy.copy(value) + else: + return super(newbytes, cls).__new__(cls, value) + + def __repr__(self): + return 'b' + super(newbytes, self).__repr__() + + def __str__(self): + return 'b' + "'{0}'".format(super(newbytes, self).__str__()) + + def __getitem__(self, y): + value = super(newbytes, self).__getitem__(y) + if isinstance(y, Integral): + return ord(value) + else: + return newbytes(value) + + def __getslice__(self, *args): + return self.__getitem__(slice(*args)) + + def __contains__(self, key): + if isinstance(key, int): + newbyteskey = newbytes([key]) + # Don't use isinstance() here because we only want to catch + # newbytes, not Python 2 str: + elif type(key) == newbytes: + newbyteskey = key + else: + newbyteskey = newbytes(key) + return issubset(list(newbyteskey), list(self)) + + @no(unicode) + def __add__(self, other): + return newbytes(super(newbytes, self).__add__(other)) + + @no(unicode) + def __radd__(self, left): + return newbytes(left) + self + + @no(unicode) + def __mul__(self, other): + return newbytes(super(newbytes, self).__mul__(other)) + + @no(unicode) + def __rmul__(self, other): + return newbytes(super(newbytes, self).__rmul__(other)) + + def __mod__(self, vals): + if isinstance(vals, newbytes): + vals = _builtin_bytes.__str__(vals) + + elif isinstance(vals, tuple): + newvals = [] + for v in vals: + if isinstance(v, newbytes): + v = _builtin_bytes.__str__(v) + newvals.append(v) + vals = tuple(newvals) + + elif (hasattr(vals.__class__, '__getitem__') and + hasattr(vals.__class__, 'iteritems')): + for k, v in vals.iteritems(): + if isinstance(v, newbytes): + vals[k] = _builtin_bytes.__str__(v) + + return _builtin_bytes.__mod__(self, vals) + + def __imod__(self, other): + return self.__mod__(other) + + def join(self, iterable_of_bytes): + errmsg = 'sequence item {0}: expected bytes, {1} found' + if isbytes(iterable_of_bytes) or istext(iterable_of_bytes): + raise TypeError(errmsg.format(0, type(iterable_of_bytes))) + for i, item in enumerate(iterable_of_bytes): + if istext(item): + raise TypeError(errmsg.format(i, type(item))) + return newbytes(super(newbytes, self).join(iterable_of_bytes)) + + @classmethod + def fromhex(cls, string): + # Only on Py2: + return cls(string.replace(' ', '').decode('hex')) + + @no(unicode) + def find(self, sub, *args): + return super(newbytes, self).find(sub, *args) + + @no(unicode) + def rfind(self, sub, *args): + return super(newbytes, self).rfind(sub, *args) + + @no(unicode, (1, 2)) + def replace(self, old, new, *args): + return newbytes(super(newbytes, self).replace(old, new, *args)) + + def encode(self, *args): + raise AttributeError("encode method has been disabled in newbytes") + + def decode(self, encoding='utf-8', errors='strict'): + """ + Returns a newstr (i.e. unicode subclass) + + Decode B using the codec registered for encoding. Default encoding + is 'utf-8'. errors may be given to set a different error + handling scheme. Default is 'strict' meaning that encoding errors raise + a UnicodeDecodeError. Other possible values are 'ignore' and 'replace' + as well as any other name registered with codecs.register_error that is + able to handle UnicodeDecodeErrors. + """ + # Py2 str.encode() takes encoding and errors as optional parameter, + # not keyword arguments as in Python 3 str. + + from future.types.newstr import newstr + + if errors == 'surrogateescape': + from future.utils.surrogateescape import register_surrogateescape + register_surrogateescape() + + return newstr(super(newbytes, self).decode(encoding, errors)) + + # This is currently broken: + # # We implement surrogateescape error handling here in addition rather + # # than relying on the custom error handler from + # # future.utils.surrogateescape to be registered globally, even though + # # that is fine in the case of decoding. (But not encoding: see the + # # comments in newstr.encode()``.) + # + # if errors == 'surrogateescape': + # # Decode char by char + # mybytes = [] + # for code in self: + # # Code is an int + # if 0x80 <= code <= 0xFF: + # b = 0xDC00 + code + # elif code <= 0x7F: + # b = _unichr(c).decode(encoding=encoding) + # else: + # # # It may be a bad byte + # # FIXME: What to do in this case? See the Py3 docs / tests. + # # # Try swallowing it. + # # continue + # # print("RAISE!") + # raise NotASurrogateError + # mybytes.append(b) + # return newbytes(mybytes) + # return newbytes(super(newstr, self).decode(encoding, errors)) + + @no(unicode) + def startswith(self, prefix, *args): + return super(newbytes, self).startswith(prefix, *args) + + @no(unicode) + def endswith(self, prefix, *args): + return super(newbytes, self).endswith(prefix, *args) + + @no(unicode) + def split(self, sep=None, maxsplit=-1): + # Py2 str.split() takes maxsplit as an optional parameter, not as a + # keyword argument as in Python 3 bytes. + parts = super(newbytes, self).split(sep, maxsplit) + return [newbytes(part) for part in parts] + + def splitlines(self, keepends=False): + """ + B.splitlines([keepends]) -> list of lines + + Return a list of the lines in B, breaking at line boundaries. + Line breaks are not included in the resulting list unless keepends + is given and true. + """ + # Py2 str.splitlines() takes keepends as an optional parameter, + # not as a keyword argument as in Python 3 bytes. + parts = super(newbytes, self).splitlines(keepends) + return [newbytes(part) for part in parts] + + @no(unicode) + def rsplit(self, sep=None, maxsplit=-1): + # Py2 str.rsplit() takes maxsplit as an optional parameter, not as a + # keyword argument as in Python 3 bytes. + parts = super(newbytes, self).rsplit(sep, maxsplit) + return [newbytes(part) for part in parts] + + @no(unicode) + def partition(self, sep): + parts = super(newbytes, self).partition(sep) + return tuple(newbytes(part) for part in parts) + + @no(unicode) + def rpartition(self, sep): + parts = super(newbytes, self).rpartition(sep) + return tuple(newbytes(part) for part in parts) + + @no(unicode, (1,)) + def rindex(self, sub, *args): + ''' + S.rindex(sub [,start [,end]]) -> int + + Like S.rfind() but raise ValueError when the substring is not found. + ''' + pos = self.rfind(sub, *args) + if pos == -1: + raise ValueError('substring not found') + + @no(unicode) + def index(self, sub, *args): + ''' + Returns index of sub in bytes. + Raises ValueError if byte is not in bytes and TypeError if can't + be converted bytes or its length is not 1. + ''' + if isinstance(sub, int): + if len(args) == 0: + start, end = 0, len(self) + elif len(args) == 1: + start = args[0] + elif len(args) == 2: + start, end = args + else: + raise TypeError('takes at most 3 arguments') + return list(self)[start:end].index(sub) + if not isinstance(sub, bytes): + try: + sub = self.__class__(sub) + except (TypeError, ValueError): + raise TypeError("can't convert sub to bytes") + try: + return super(newbytes, self).index(sub, *args) + except ValueError: + raise ValueError('substring not found') + + def __eq__(self, other): + if isinstance(other, (_builtin_bytes, bytearray)): + return super(newbytes, self).__eq__(other) + else: + return False + + def __ne__(self, other): + if isinstance(other, _builtin_bytes): + return super(newbytes, self).__ne__(other) + else: + return True + + unorderable_err = 'unorderable types: bytes() and {0}' + + def __lt__(self, other): + if isinstance(other, _builtin_bytes): + return super(newbytes, self).__lt__(other) + raise TypeError(self.unorderable_err.format(type(other))) + + def __le__(self, other): + if isinstance(other, _builtin_bytes): + return super(newbytes, self).__le__(other) + raise TypeError(self.unorderable_err.format(type(other))) + + def __gt__(self, other): + if isinstance(other, _builtin_bytes): + return super(newbytes, self).__gt__(other) + raise TypeError(self.unorderable_err.format(type(other))) + + def __ge__(self, other): + if isinstance(other, _builtin_bytes): + return super(newbytes, self).__ge__(other) + raise TypeError(self.unorderable_err.format(type(other))) + + def __native__(self): + # We can't just feed a newbytes object into str(), because + # newbytes.__str__() returns e.g. "b'blah'", consistent with Py3 bytes. + return super(newbytes, self).__str__() + + def __getattribute__(self, name): + """ + A trick to cause the ``hasattr`` builtin-fn to return False for + the 'encode' method on Py2. + """ + if name in ['encode', u'encode']: + raise AttributeError("encode method has been disabled in newbytes") + return super(newbytes, self).__getattribute__(name) + + @no(unicode) + def rstrip(self, bytes_to_strip=None): + """ + Strip trailing bytes contained in the argument. + If the argument is omitted, strip trailing ASCII whitespace. + """ + return newbytes(super(newbytes, self).rstrip(bytes_to_strip)) + + @no(unicode) + def strip(self, bytes_to_strip=None): + """ + Strip leading and trailing bytes contained in the argument. + If the argument is omitted, strip trailing ASCII whitespace. + """ + return newbytes(super(newbytes, self).strip(bytes_to_strip)) + + def lower(self): + """ + b.lower() -> copy of b + + Return a copy of b with all ASCII characters converted to lowercase. + """ + return newbytes(super(newbytes, self).lower()) + + @no(unicode) + def upper(self): + """ + b.upper() -> copy of b + + Return a copy of b with all ASCII characters converted to uppercase. + """ + return newbytes(super(newbytes, self).upper()) + + @classmethod + @no(unicode) + def maketrans(cls, frm, to): + """ + B.maketrans(frm, to) -> translation table + + Return a translation table (a bytes object of length 256) suitable + for use in the bytes or bytearray translate method where each byte + in frm is mapped to the byte at the same position in to. + The bytes objects frm and to must be of the same length. + """ + return newbytes(string.maketrans(frm, to)) + + +__all__ = ['newbytes'] diff --git a/src/future/types/newdict.py b/src/future/types/newdict.py new file mode 100644 index 00000000..d90316cb --- /dev/null +++ b/src/future/types/newdict.py @@ -0,0 +1,76 @@ +""" +A dict subclass for Python 2 that behaves like Python 3's dict + +Example use: + +>>> from builtins import dict +>>> d1 = dict() # instead of {} for an empty dict +>>> d2 = dict(key1='value1', key2='value2') + +The keys, values and items methods now return iterators on Python 2.x +(with set-like behaviour on Python 2.7). + +>>> for d in (d1, d2): +... assert not isinstance(d.keys(), list) +... assert not isinstance(d.values(), list) +... assert not isinstance(d.items(), list) +""" + +import sys + +from future.utils import with_metaclass +from future.types.newobject import newobject + + +_builtin_dict = dict +ver = sys.version_info + + +class BaseNewDict(type): + def __instancecheck__(cls, instance): + if cls == newdict: + return isinstance(instance, _builtin_dict) + else: + return issubclass(instance.__class__, cls) + + +class newdict(with_metaclass(BaseNewDict, _builtin_dict)): + """ + A backport of the Python 3 dict object to Py2 + """ + + if ver >= (3,): + # Inherit items, keys and values from `dict` in 3.x + pass + elif ver >= (2, 7): + items = dict.viewitems + keys = dict.viewkeys + values = dict.viewvalues + else: + items = dict.iteritems + keys = dict.iterkeys + values = dict.itervalues + + def __new__(cls, *args, **kwargs): + """ + dict() -> new empty dictionary + dict(mapping) -> new dictionary initialized from a mapping object's + (key, value) pairs + dict(iterable) -> new dictionary initialized as if via: + d = {} + for k, v in iterable: + d[k] = v + dict(**kwargs) -> new dictionary initialized with the name=value pairs + in the keyword argument list. For example: dict(one=1, two=2) + """ + + return super(newdict, cls).__new__(cls, *args) + + def __native__(self): + """ + Hook for the future.utils.native() function + """ + return dict(self) + + +__all__ = ['newdict'] diff --git a/src/future/types/newint.py b/src/future/types/newint.py new file mode 100644 index 00000000..ebc5715e --- /dev/null +++ b/src/future/types/newint.py @@ -0,0 +1,386 @@ +""" +Backport of Python 3's int, based on Py2's long. + +They are very similar. The most notable difference is: + +- representation: trailing L in Python 2 removed in Python 3 +""" +from __future__ import division + +import struct + +from future.types.newbytes import newbytes +from future.types.newobject import newobject +from future.utils import PY3, isint, istext, isbytes, with_metaclass, native + + +if PY3: + long = int + from collections.abc import Iterable +else: + from collections import Iterable + + +class BaseNewInt(type): + def __instancecheck__(cls, instance): + if cls == newint: + # Special case for Py2 short or long int + return isinstance(instance, (int, long)) + else: + return issubclass(instance.__class__, cls) + + +class newint(with_metaclass(BaseNewInt, long)): + """ + A backport of the Python 3 int object to Py2 + """ + def __new__(cls, x=0, base=10): + """ + From the Py3 int docstring: + + | int(x=0) -> integer + | int(x, base=10) -> integer + | + | Convert a number or string to an integer, or return 0 if no + | arguments are given. If x is a number, return x.__int__(). For + | floating point numbers, this truncates towards zero. + | + | If x is not a number or if base is given, then x must be a string, + | bytes, or bytearray instance representing an integer literal in the + | given base. The literal can be preceded by '+' or '-' and be + | surrounded by whitespace. The base defaults to 10. Valid bases are + | 0 and 2-36. Base 0 means to interpret the base from the string as an + | integer literal. + | >>> int('0b100', base=0) + | 4 + + """ + try: + val = x.__int__() + except AttributeError: + val = x + else: + if not isint(val): + raise TypeError('__int__ returned non-int ({0})'.format( + type(val))) + + if base != 10: + # Explicit base + if not (istext(val) or isbytes(val) or isinstance(val, bytearray)): + raise TypeError( + "int() can't convert non-string with explicit base") + try: + return super(newint, cls).__new__(cls, val, base) + except TypeError: + return super(newint, cls).__new__(cls, newbytes(val), base) + # After here, base is 10 + try: + return super(newint, cls).__new__(cls, val) + except TypeError: + # Py2 long doesn't handle bytearray input with an explicit base, so + # handle this here. + # Py3: int(bytearray(b'10'), 2) == 2 + # Py2: int(bytearray(b'10'), 2) == 2 raises TypeError + # Py2: long(bytearray(b'10'), 2) == 2 raises TypeError + try: + return super(newint, cls).__new__(cls, newbytes(val)) + except: + raise TypeError("newint argument must be a string or a number," + "not '{0}'".format(type(val))) + + def __repr__(self): + """ + Without the L suffix + """ + value = super(newint, self).__repr__() + assert value[-1] == 'L' + return value[:-1] + + def __add__(self, other): + value = super(newint, self).__add__(other) + if value is NotImplemented: + return long(self) + other + return newint(value) + + def __radd__(self, other): + value = super(newint, self).__radd__(other) + if value is NotImplemented: + return other + long(self) + return newint(value) + + def __sub__(self, other): + value = super(newint, self).__sub__(other) + if value is NotImplemented: + return long(self) - other + return newint(value) + + def __rsub__(self, other): + value = super(newint, self).__rsub__(other) + if value is NotImplemented: + return other - long(self) + return newint(value) + + def __mul__(self, other): + value = super(newint, self).__mul__(other) + if isint(value): + return newint(value) + elif value is NotImplemented: + return long(self) * other + return value + + def __rmul__(self, other): + value = super(newint, self).__rmul__(other) + if isint(value): + return newint(value) + elif value is NotImplemented: + return other * long(self) + return value + + def __div__(self, other): + # We override this rather than e.g. relying on object.__div__ or + # long.__div__ because we want to wrap the value in a newint() + # call if other is another int + value = long(self) / other + if isinstance(other, (int, long)): + return newint(value) + else: + return value + + def __rdiv__(self, other): + value = other / long(self) + if isinstance(other, (int, long)): + return newint(value) + else: + return value + + def __idiv__(self, other): + # long has no __idiv__ method. Use __itruediv__ and cast back to + # newint: + value = self.__itruediv__(other) + if isinstance(other, (int, long)): + return newint(value) + else: + return value + + def __truediv__(self, other): + value = super(newint, self).__truediv__(other) + if value is NotImplemented: + value = long(self) / other + return value + + def __rtruediv__(self, other): + return super(newint, self).__rtruediv__(other) + + def __itruediv__(self, other): + # long has no __itruediv__ method + mylong = long(self) + mylong /= other + return mylong + + def __floordiv__(self, other): + return newint(super(newint, self).__floordiv__(other)) + + def __rfloordiv__(self, other): + return newint(super(newint, self).__rfloordiv__(other)) + + def __ifloordiv__(self, other): + # long has no __ifloordiv__ method + mylong = long(self) + mylong //= other + return newint(mylong) + + def __mod__(self, other): + value = super(newint, self).__mod__(other) + if value is NotImplemented: + return long(self) % other + return newint(value) + + def __rmod__(self, other): + value = super(newint, self).__rmod__(other) + if value is NotImplemented: + return other % long(self) + return newint(value) + + def __divmod__(self, other): + value = super(newint, self).__divmod__(other) + if value is NotImplemented: + mylong = long(self) + return (mylong // other, mylong % other) + return (newint(value[0]), newint(value[1])) + + def __rdivmod__(self, other): + value = super(newint, self).__rdivmod__(other) + if value is NotImplemented: + mylong = long(self) + return (other // mylong, other % mylong) + return (newint(value[0]), newint(value[1])) + + def __pow__(self, other): + value = super(newint, self).__pow__(other) + if value is NotImplemented: + return long(self) ** other + return newint(value) + + def __rpow__(self, other): + value = super(newint, self).__rpow__(other) + if isint(value): + return newint(value) + elif value is NotImplemented: + return other ** long(self) + return value + + def __lshift__(self, other): + if not isint(other): + raise TypeError( + "unsupported operand type(s) for <<: '%s' and '%s'" % + (type(self).__name__, type(other).__name__)) + return newint(super(newint, self).__lshift__(other)) + + def __rshift__(self, other): + if not isint(other): + raise TypeError( + "unsupported operand type(s) for >>: '%s' and '%s'" % + (type(self).__name__, type(other).__name__)) + return newint(super(newint, self).__rshift__(other)) + + def __and__(self, other): + if not isint(other): + raise TypeError( + "unsupported operand type(s) for &: '%s' and '%s'" % + (type(self).__name__, type(other).__name__)) + return newint(super(newint, self).__and__(other)) + + def __or__(self, other): + if not isint(other): + raise TypeError( + "unsupported operand type(s) for |: '%s' and '%s'" % + (type(self).__name__, type(other).__name__)) + return newint(super(newint, self).__or__(other)) + + def __xor__(self, other): + if not isint(other): + raise TypeError( + "unsupported operand type(s) for ^: '%s' and '%s'" % + (type(self).__name__, type(other).__name__)) + return newint(super(newint, self).__xor__(other)) + + def __neg__(self): + return newint(super(newint, self).__neg__()) + + def __pos__(self): + return newint(super(newint, self).__pos__()) + + def __abs__(self): + return newint(super(newint, self).__abs__()) + + def __invert__(self): + return newint(super(newint, self).__invert__()) + + def __int__(self): + return self + + def __nonzero__(self): + return self.__bool__() + + def __bool__(self): + """ + So subclasses can override this, Py3-style + """ + if PY3: + return super(newint, self).__bool__() + + return super(newint, self).__nonzero__() + + def __native__(self): + return long(self) + + def to_bytes(self, length, byteorder='big', signed=False): + """ + Return an array of bytes representing an integer. + + The integer is represented using length bytes. An OverflowError is + raised if the integer is not representable with the given number of + bytes. + + The byteorder argument determines the byte order used to represent the + integer. If byteorder is 'big', the most significant byte is at the + beginning of the byte array. If byteorder is 'little', the most + significant byte is at the end of the byte array. To request the native + byte order of the host system, use `sys.byteorder' as the byte order value. + + The signed keyword-only argument determines whether two's complement is + used to represent the integer. If signed is False and a negative integer + is given, an OverflowError is raised. + """ + if length < 0: + raise ValueError("length argument must be non-negative") + if length == 0 and self == 0: + return newbytes() + if signed and self < 0: + bits = length * 8 + num = (2**bits) + self + if num <= 0: + raise OverflowError("int too small to convert") + else: + if self < 0: + raise OverflowError("can't convert negative int to unsigned") + num = self + if byteorder not in ('little', 'big'): + raise ValueError("byteorder must be either 'little' or 'big'") + h = b'%x' % num + s = newbytes((b'0'*(len(h) % 2) + h).zfill(length*2).decode('hex')) + if signed: + high_set = s[0] & 0x80 + if self > 0 and high_set: + raise OverflowError("int too big to convert") + if self < 0 and not high_set: + raise OverflowError("int too small to convert") + if len(s) > length: + raise OverflowError("int too big to convert") + return s if byteorder == 'big' else s[::-1] + + @classmethod + def from_bytes(cls, mybytes, byteorder='big', signed=False): + """ + Return the integer represented by the given array of bytes. + + The mybytes argument must either support the buffer protocol or be an + iterable object producing bytes. Bytes and bytearray are examples of + built-in objects that support the buffer protocol. + + The byteorder argument determines the byte order used to represent the + integer. If byteorder is 'big', the most significant byte is at the + beginning of the byte array. If byteorder is 'little', the most + significant byte is at the end of the byte array. To request the native + byte order of the host system, use `sys.byteorder' as the byte order value. + + The signed keyword-only argument indicates whether two's complement is + used to represent the integer. + """ + if byteorder not in ('little', 'big'): + raise ValueError("byteorder must be either 'little' or 'big'") + if isinstance(mybytes, unicode): + raise TypeError("cannot convert unicode objects to bytes") + # mybytes can also be passed as a sequence of integers on Py3. + # Test for this: + elif isinstance(mybytes, Iterable): + mybytes = newbytes(mybytes) + b = mybytes if byteorder == 'big' else mybytes[::-1] + if len(b) == 0: + b = b'\x00' + # The encode() method has been disabled by newbytes, but Py2's + # str has it: + num = int(native(b).encode('hex'), 16) + if signed and (b[0] & 0x80): + num = num - (2 ** (len(b)*8)) + return cls(num) + + +# def _twos_comp(val, bits): +# """compute the 2's compliment of int value val""" +# if( (val&(1<<(bits-1))) != 0 ): +# val = val - (1<>> from builtins import list +>>> l1 = list() # instead of {} for an empty list +>>> l1.append('hello') +>>> l2 = l1.copy() + +""" + +import sys +import copy + +from future.utils import with_metaclass +from future.types.newobject import newobject + + +_builtin_list = list +ver = sys.version_info[:2] + + +class BaseNewList(type): + def __instancecheck__(cls, instance): + if cls == newlist: + return isinstance(instance, _builtin_list) + else: + return issubclass(instance.__class__, cls) + + +class newlist(with_metaclass(BaseNewList, _builtin_list)): + """ + A backport of the Python 3 list object to Py2 + """ + def copy(self): + """ + L.copy() -> list -- a shallow copy of L + """ + return copy.copy(self) + + def clear(self): + """L.clear() -> None -- remove all items from L""" + for i in range(len(self)): + self.pop() + + def __new__(cls, *args, **kwargs): + """ + list() -> new empty list + list(iterable) -> new list initialized from iterable's items + """ + + if len(args) == 0: + return super(newlist, cls).__new__(cls) + elif type(args[0]) == newlist: + value = args[0] + else: + value = args[0] + return super(newlist, cls).__new__(cls, value) + + def __add__(self, value): + return newlist(super(newlist, self).__add__(value)) + + def __radd__(self, left): + " left + self " + try: + return newlist(left) + self + except: + return NotImplemented + + def __getitem__(self, y): + """ + x.__getitem__(y) <==> x[y] + + Warning: a bug in Python 2.x prevents indexing via a slice from + returning a newlist object. + """ + if isinstance(y, slice): + return newlist(super(newlist, self).__getitem__(y)) + else: + return super(newlist, self).__getitem__(y) + + def __native__(self): + """ + Hook for the future.utils.native() function + """ + return list(self) + + def __nonzero__(self): + return len(self) > 0 + + +__all__ = ['newlist'] diff --git a/src/future/types/newmemoryview.py b/src/future/types/newmemoryview.py new file mode 100644 index 00000000..09f804dc --- /dev/null +++ b/src/future/types/newmemoryview.py @@ -0,0 +1,29 @@ +""" +A pretty lame implementation of a memoryview object for Python 2.6. +""" +from numbers import Integral +import string + +from future.utils import istext, isbytes, PY2, with_metaclass +from future.types import no, issubset + +if PY2: + from collections import Iterable +else: + from collections.abc import Iterable + +# class BaseNewBytes(type): +# def __instancecheck__(cls, instance): +# return isinstance(instance, _builtin_bytes) + + +class newmemoryview(object): # with_metaclass(BaseNewBytes, _builtin_bytes)): + """ + A pretty lame backport of the Python 2.7 and Python 3.x + memoryviewview object to Py2.6. + """ + def __init__(self, obj): + return obj + + +__all__ = ['newmemoryview'] diff --git a/src/future/types/newobject.py b/src/future/types/newobject.py new file mode 100644 index 00000000..31b84fc1 --- /dev/null +++ b/src/future/types/newobject.py @@ -0,0 +1,117 @@ +""" +An object subclass for Python 2 that gives new-style classes written in the +style of Python 3 (with ``__next__`` and unicode-returning ``__str__`` methods) +the appropriate Python 2-style ``next`` and ``__unicode__`` methods for compatible. + +Example use:: + + from builtins import object + + my_unicode_str = u'Unicode string: \u5b54\u5b50' + + class A(object): + def __str__(self): + return my_unicode_str + + a = A() + print(str(a)) + + # On Python 2, these relations hold: + assert unicode(a) == my_unicode_string + assert str(a) == my_unicode_string.encode('utf-8') + + +Another example:: + + from builtins import object + + class Upper(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def __next__(self): # note the Py3 interface + return next(self._iter).upper() + def __iter__(self): + return self + + assert list(Upper('hello')) == list('HELLO') + +""" + + +class newobject(object): + """ + A magical object class that provides Python 2 compatibility methods:: + next + __unicode__ + __nonzero__ + + Subclasses of this class can merely define the Python 3 methods (__next__, + __str__, and __bool__). + """ + def next(self): + if hasattr(self, '__next__'): + return type(self).__next__(self) + raise TypeError('newobject is not an iterator') + + def __unicode__(self): + # All subclasses of the builtin object should have __str__ defined. + # Note that old-style classes do not have __str__ defined. + if hasattr(self, '__str__'): + s = type(self).__str__(self) + else: + s = str(self) + if isinstance(s, unicode): + return s + else: + return s.decode('utf-8') + + def __nonzero__(self): + if hasattr(self, '__bool__'): + return type(self).__bool__(self) + if hasattr(self, '__len__'): + return type(self).__len__(self) + # object has no __nonzero__ method + return True + + # Are these ever needed? + # def __div__(self): + # return self.__truediv__() + + # def __idiv__(self, other): + # return self.__itruediv__(other) + + def __long__(self): + if not hasattr(self, '__int__'): + return NotImplemented + return self.__int__() # not type(self).__int__(self) + + # def __new__(cls, *args, **kwargs): + # """ + # dict() -> new empty dictionary + # dict(mapping) -> new dictionary initialized from a mapping object's + # (key, value) pairs + # dict(iterable) -> new dictionary initialized as if via: + # d = {} + # for k, v in iterable: + # d[k] = v + # dict(**kwargs) -> new dictionary initialized with the name=value pairs + # in the keyword argument list. For example: dict(one=1, two=2) + # """ + + # if len(args) == 0: + # return super(newdict, cls).__new__(cls) + # elif type(args[0]) == newdict: + # return args[0] + # else: + # value = args[0] + # return super(newdict, cls).__new__(cls, value) + + def __native__(self): + """ + Hook for the future.utils.native() function + """ + return object(self) + + __slots__ = [] + +__all__ = ['newobject'] diff --git a/future/builtins/backports/newopen.py b/src/future/types/newopen.py similarity index 99% rename from future/builtins/backports/newopen.py rename to src/future/types/newopen.py index 8da06427..b75d45af 100644 --- a/future/builtins/backports/newopen.py +++ b/src/future/types/newopen.py @@ -30,4 +30,3 @@ def __enter__(self): def __exit__(self, etype, value, traceback): self.f.close() - diff --git a/future/builtins/backports/newrange.py b/src/future/types/newrange.py similarity index 61% rename from future/builtins/backports/newrange.py rename to src/future/types/newrange.py index 815df769..dc5eb802 100644 --- a/future/builtins/backports/newrange.py +++ b/src/future/types/newrange.py @@ -1,5 +1,5 @@ """ -Nearly identical to xrange.py, by Dan Crosta, from +Nearly identical to xrange.py, by Dan Crosta, from https://github.com/dcrosta/xrange.git @@ -17,18 +17,26 @@ Read more at https://late.am/post/2012/06/18/what-the-heck-is-an-xrange """ +from __future__ import absolute_import -from math import ceil -from collections import Sequence, Iterator +from future.utils import PY2 -from future.utils import PY3 +if PY2: + from collections import Sequence, Iterator +else: + from collections.abc import Sequence, Iterator +from itertools import islice + +from future.backports.misc import count # with step parameter on Py2.6 +# For backward compatibility with python-future versions < 0.14.4: +_count = count class newrange(Sequence): """ Pure-Python backport of Python 3's range object. See `the CPython documentation for details: - `_ + `_ """ def __init__(self, *args): @@ -58,18 +66,28 @@ def __init__(self, *args): self._step = step self._len = (stop - start) // step + bool((stop - start) % step) + @property + def start(self): + return self._start + + @property + def stop(self): + return self._stop + + @property + def step(self): + return self._step + def __repr__(self): - if self._start == 0 and self._step == 1: - return 'range(%d)' % self._stop - elif self._step == 1: + if self._step == 1: return 'range(%d, %d)' % (self._start, self._stop) return 'range(%d, %d, %d)' % (self._start, self._stop, self._step) def __eq__(self, other): - return isinstance(other, newrange) and \ - self._start == other._start and \ - self._stop == other._stop and \ - self._step == other._step + return (isinstance(other, newrange) and + (self._len == 0 == other._len or + (self._start, self._step, self._len) == + (other._start, other._step, other._len))) def __len__(self): return self._len @@ -77,14 +95,17 @@ def __len__(self): def index(self, value): """Return the 0-based position of integer `value` in the sequence this range represents.""" - diff = value - self._start + try: + diff = value - self._start + except TypeError: + raise ValueError('%r is not in range' % value) quotient, remainder = divmod(diff, self._step) if remainder == 0 and 0 <= quotient < self._len: return abs(quotient) raise ValueError('%r is not in range' % value) def count(self, value): - """Return the number of ocurrences of integer `value` + """Return the number of occurrences of integer `value` in the sequence this range represents.""" # a value can occur exactly zero or one times return int(value in self) @@ -99,12 +120,7 @@ def __contains__(self, value): return False def __reversed__(self): - """Return a range which represents a sequence whose - contents are the same as the sequence this range - represents, but in the opposite order.""" - sign = self._step / abs(self._step) - last = self._start + ((self._len - 1) * self._step) - return newrange(last, self._start - sign, -1 * self._step) + return iter(self[::-1]) def __getitem__(self, index): """Return the element at position ``index`` in the sequence @@ -123,56 +139,32 @@ def __getitem_slice(self, slce): """Return a range which represents the requested slce of the sequence represented by this range. """ - start, stop, step = slce.start, slce.stop, slce.step - if step == 0: - raise ValueError('slice step cannot be 0') - - start = start or self._start - stop = stop or self._stop - if start < 0: - start = max(0, start + self._len) - if stop < 0: - stop = max(start, stop + self._len) - - if step is None or step > 0: - return newrange(start, stop, step or 1) - else: - rv = reversed(self) - rv._step = step - return rv + scaled_indices = (self._step * n for n in slce.indices(self._len)) + start_offset, stop_offset, new_step = scaled_indices + return newrange(self._start + start_offset, + self._start + stop_offset, + new_step) def __iter__(self): """Return an iterator which enumerates the elements of the sequence this range represents.""" - return rangeiterator(self) + return range_iterator(self) -class rangeiterator(Iterator): +class range_iterator(Iterator): """An iterator for a :class:`range`. """ - - def __init__(self, rangeobj): - self._range = rangeobj - - # Intialize the "last outputted value" to the value - # just before the first value; this simplifies next() - self._last = self._range._start - self._range._step - self._count = 0 + def __init__(self, range_): + self._stepper = islice(count(range_.start, range_.step), len(range_)) def __iter__(self): - """An iterator is already an iterator, so return ``self``. - """ return self + def __next__(self): + return next(self._stepper) + def next(self): - """Return the next element in the sequence represented - by the range we are iterating, or raise StopIteration - if we have passed the end of the sequence.""" - self._last += self._range._step - self._count += 1 - if self._count > self._range._len: - raise StopIteration() - return self._last + return next(self._stepper) __all__ = ['newrange'] diff --git a/future/builtins/backports/newstr.py b/src/future/types/newstr.py similarity index 53% rename from future/builtins/backports/newstr.py rename to src/future/types/newstr.py index 3fb48600..8ca191f9 100644 --- a/future/builtins/backports/newstr.py +++ b/src/future/types/newstr.py @@ -9,7 +9,7 @@ as follows: >>> from __future__ import unicode_literals - >>> from future.builtins import str, isinstance + >>> from builtins import str, isinstance On Python 3.x and normally on Python 2.x, these expressions hold @@ -37,24 +37,30 @@ ``__unicode__`` method on objects in Python 2. To define string representations of your objects portably across Py3 and Py2, use the :func:`python_2_unicode_compatible` decorator in :mod:`future.utils`. - -""" -from collections import Iterable +""" from numbers import Number + from future.utils import PY3, istext, with_metaclass, isnewbytes -from future.builtins.backports import no, issubset +from future.types import no, issubset +from future.types.newobject import newobject if PY3: # We'll probably never use newstr on Py3 anyway... unicode = str + from collections.abc import Iterable +else: + from collections import Iterable class BaseNewStr(type): def __instancecheck__(cls, instance): - return isinstance(instance, unicode) + if cls == newstr: + return isinstance(instance, unicode) + else: + return issubclass(instance.__class__, cls) class newstr(with_metaclass(BaseNewStr, unicode)): @@ -69,7 +75,7 @@ def __new__(cls, *args, **kwargs): str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str - + Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. @@ -77,16 +83,14 @@ def __new__(cls, *args, **kwargs): or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'. - + """ - if len(args) == 0: return super(newstr, cls).__new__(cls) - # Was: elif isinstance(args[0], newstr): - # We use type() instead of the above because we're redefining - # this to be True for all unicode string subclasses. Warning: - # This may render newstr un-subclassable. - elif type(args[0]) == newstr: + # Special case: If someone requests str(str(u'abc')), return the same + # object (same id) for consistency with Py3.3. This is not true for + # other objects like list or dict. + elif type(args[0]) == newstr and cls == newstr: return args[0] elif isinstance(args[0], unicode): value = args[0] @@ -98,16 +102,22 @@ def __new__(cls, *args, **kwargs): else: value = args[0] return super(newstr, cls).__new__(cls, value) - + def __repr__(self): """ Without the u prefix """ + value = super(newstr, self).__repr__() # assert value[0] == u'u' return value[1:] def __getitem__(self, y): + """ + Warning: Python <= 2.7.6 has a bug that causes this method never to be called + when y is a slice object. Therefore the type of newstr()[:2] is wrong + (unicode instead of newstr). + """ return newstr(super(newstr, self).__getitem__(y)) def __contains__(self, key): @@ -121,7 +131,7 @@ def __contains__(self, key): else: raise TypeError(errmsg.format(type(key))) return issubset(list(newkey), list(self)) - + @no('newbytes') def __add__(self, other): return newstr(super(newstr, self).__add__(other)) @@ -148,7 +158,11 @@ def join(self, iterable): # isinstance(b'abc', newbytes) is True on Py2. if isnewbytes(item): raise TypeError(errmsg.format(i)) - return newstr(super(newstr, self).join(iterable)) + # Support use as a staticmethod: str.join('-', ['a', 'b']) + if type(self) == newstr: + return newstr(super(newstr, self).join(iterable)) + else: + return newstr(super(newstr, newstr(self)).join(iterable)) @no('newbytes') def find(self, sub, *args): @@ -176,9 +190,34 @@ def encode(self, encoding='utf-8', errors='strict'): 'xmlcharrefreplace' as well as any other name registered with codecs.register_error that can handle UnicodeEncodeErrors. """ - from future.builtins.backports.newbytes import newbytes + from future.types.newbytes import newbytes # Py2 unicode.encode() takes encoding and errors as optional parameter, # not keyword arguments as in Python 3 str. + + # For the surrogateescape error handling mechanism, the + # codecs.register_error() function seems to be inadequate for an + # implementation of it when encoding. (Decoding seems fine, however.) + # For example, in the case of + # u'\udcc3'.encode('ascii', 'surrogateescape_handler') + # after registering the ``surrogateescape_handler`` function in + # future.utils.surrogateescape, both Python 2.x and 3.x raise an + # exception anyway after the function is called because the unicode + # string it has to return isn't encodable strictly as ASCII. + + if errors == 'surrogateescape': + if encoding == 'utf-16': + # Known to fail here. See test_encoding_works_normally() + raise NotImplementedError('FIXME: surrogateescape handling is ' + 'not yet implemented properly') + # Encode char by char, building up list of byte-strings + mybytes = [] + for c in self: + code = ord(c) + if 0xD800 <= code <= 0xDCFF: + mybytes.append(newbytes([code - 0xDC00])) + else: + mybytes.append(c.encode(encoding=encoding)) + return newbytes(b'').join(mybytes) return newbytes(super(newstr, self).encode(encoding, errors)) @no('newbytes', 1) @@ -236,12 +275,32 @@ def index(self, sub, *args): raise ValueError('substring not found') return pos + def splitlines(self, keepends=False): + """ + S.splitlines(keepends=False) -> list of strings + + Return a list of the lines in S, breaking at line boundaries. + Line breaks are not included in the resulting list unless keepends + is given and true. + """ + # Py2 unicode.splitlines() takes keepends as an optional parameter, + # not as a keyword argument as in Python 3 str. + parts = super(newstr, self).splitlines(keepends) + return [newstr(part) for part in parts] + def __eq__(self, other): if (isinstance(other, unicode) or isinstance(other, bytes) and not isnewbytes(other)): return super(newstr, self).__eq__(other) else: - return False + return NotImplemented + + def __hash__(self): + if (isinstance(self, unicode) or + isinstance(self, bytes) and not isnewbytes(self)): + return super(newstr, self).__hash__() + else: + raise NotImplementedError() def __ne__(self, other): if (isinstance(other, unicode) or @@ -253,27 +312,115 @@ def __ne__(self, other): unorderable_err = 'unorderable types: str() and {0}' def __lt__(self, other): - if not istext(other): - raise TypeError(self.unorderable_err.format(type(other))) - return super(newstr, self).__lt__(other) + if (isinstance(other, unicode) or + isinstance(other, bytes) and not isnewbytes(other)): + return super(newstr, self).__lt__(other) + raise TypeError(self.unorderable_err.format(type(other))) def __le__(self, other): - if not istext(other): - raise TypeError(self.unorderable_err.format(type(other))) - return super(newstr, self).__le__(other) + if (isinstance(other, unicode) or + isinstance(other, bytes) and not isnewbytes(other)): + return super(newstr, self).__le__(other) + raise TypeError(self.unorderable_err.format(type(other))) def __gt__(self, other): - if not istext(other): - raise TypeError(self.unorderable_err.format(type(other))) - return super(newstr, self).__gt__(other) + if (isinstance(other, unicode) or + isinstance(other, bytes) and not isnewbytes(other)): + return super(newstr, self).__gt__(other) + raise TypeError(self.unorderable_err.format(type(other))) def __ge__(self, other): - if not istext(other): - raise TypeError(self.unorderable_err.format(type(other))) - return super(newstr, self).__ge__(other) + if (isinstance(other, unicode) or + isinstance(other, bytes) and not isnewbytes(other)): + return super(newstr, self).__ge__(other) + raise TypeError(self.unorderable_err.format(type(other))) + + def __getattribute__(self, name): + """ + A trick to cause the ``hasattr`` builtin-fn to return False for + the 'decode' method on Py2. + """ + if name in ['decode', u'decode']: + raise AttributeError("decode method has been disabled in newstr") + return super(newstr, self).__getattribute__(name) def __native__(self): + """ + A hook for the future.utils.native() function. + """ return unicode(self) + @staticmethod + def maketrans(x, y=None, z=None): + """ + Return a translation table usable for str.translate(). + + If there is only one argument, it must be a dictionary mapping Unicode + ordinals (integers) or characters to Unicode ordinals, strings or None. + Character keys will be then converted to ordinals. + If there are two arguments, they must be strings of equal length, and + in the resulting dictionary, each character in x will be mapped to the + character at the same position in y. If there is a third argument, it + must be a string, whose characters will be mapped to None in the result. + """ + + if y is None: + assert z is None + if not isinstance(x, dict): + raise TypeError('if you give only one argument to maketrans it must be a dict') + result = {} + for (key, value) in x.items(): + if len(key) > 1: + raise ValueError('keys in translate table must be strings or integers') + result[ord(key)] = value + else: + if not isinstance(x, unicode) and isinstance(y, unicode): + raise TypeError('x and y must be unicode strings') + if not len(x) == len(y): + raise ValueError('the first two maketrans arguments must have equal length') + result = {} + for (xi, yi) in zip(x, y): + if len(xi) > 1: + raise ValueError('keys in translate table must be strings or integers') + result[ord(xi)] = ord(yi) + + if z is not None: + for char in z: + result[ord(char)] = None + return result + + def translate(self, table): + """ + S.translate(table) -> str + + Return a copy of the string S, where all characters have been mapped + through the given translation table, which must be a mapping of + Unicode ordinals to Unicode ordinals, strings, or None. + Unmapped characters are left untouched. Characters mapped to None + are deleted. + """ + l = [] + for c in self: + if ord(c) in table: + val = table[ord(c)] + if val is None: + continue + elif isinstance(val, unicode): + l.append(val) + else: + l.append(chr(val)) + else: + l.append(c) + return ''.join(l) + + def isprintable(self): + raise NotImplementedError('fixme') + + def isidentifier(self): + raise NotImplementedError('fixme') + + def format_map(self): + raise NotImplementedError('fixme') + __all__ = ['newstr'] diff --git a/src/future/utils/__init__.py b/src/future/utils/__init__.py new file mode 100644 index 00000000..ec1b1027 --- /dev/null +++ b/src/future/utils/__init__.py @@ -0,0 +1,770 @@ +""" +A selection of cross-compatible functions for Python 2 and 3. + +This module exports useful functions for 2/3 compatible code: + + * bind_method: binds functions to classes + * ``native_str_to_bytes`` and ``bytes_to_native_str`` + * ``native_str``: always equal to the native platform string object (because + this may be shadowed by imports from future.builtins) + * lists: lrange(), lmap(), lzip(), lfilter() + * iterable method compatibility: + - iteritems, iterkeys, itervalues + - viewitems, viewkeys, viewvalues + + These use the original method if available, otherwise they use items, + keys, values. + + * types: + + * text_type: unicode in Python 2, str in Python 3 + * string_types: basestring in Python 2, str in Python 3 + * binary_type: str in Python 2, bytes in Python 3 + * integer_types: (int, long) in Python 2, int in Python 3 + * class_types: (type, types.ClassType) in Python 2, type in Python 3 + + * bchr(c): + Take an integer and make a 1-character byte string + * bord(c) + Take the result of indexing on a byte string and make an integer + * tobytes(s) + Take a text string, a byte string, or a sequence of characters taken + from a byte string, and make a byte string. + + * raise_from() + * raise_with_traceback() + +This module also defines these decorators: + + * ``python_2_unicode_compatible`` + * ``with_metaclass`` + * ``implements_iterator`` + +Some of the functions in this module come from the following sources: + + * Jinja2 (BSD licensed: see + https://github.com/mitsuhiko/jinja2/blob/master/LICENSE) + * Pandas compatibility module pandas.compat + * six.py by Benjamin Peterson + * Django +""" + +import types +import sys +import numbers +import functools +import copy +import inspect + + +PY3 = sys.version_info[0] >= 3 +PY34_PLUS = sys.version_info[0:2] >= (3, 4) +PY35_PLUS = sys.version_info[0:2] >= (3, 5) +PY36_PLUS = sys.version_info[0:2] >= (3, 6) +PY37_PLUS = sys.version_info[0:2] >= (3, 7) +PY38_PLUS = sys.version_info[0:2] >= (3, 8) +PY39_PLUS = sys.version_info[0:2] >= (3, 9) +PY2 = sys.version_info[0] == 2 +PY26 = sys.version_info[0:2] == (2, 6) +PY27 = sys.version_info[0:2] == (2, 7) +PYPY = hasattr(sys, 'pypy_translation_info') + + +def python_2_unicode_compatible(cls): + """ + A decorator that defines __unicode__ and __str__ methods under Python + 2. Under Python 3, this decorator is a no-op. + + To support Python 2 and 3 with a single code base, define a __str__ + method returning unicode text and apply this decorator to the class, like + this:: + + >>> from future.utils import python_2_unicode_compatible + + >>> @python_2_unicode_compatible + ... class MyClass(object): + ... def __str__(self): + ... return u'Unicode string: \u5b54\u5b50' + + >>> a = MyClass() + + Then, after this import: + + >>> from future.builtins import str + + the following is ``True`` on both Python 3 and 2:: + + >>> str(a) == a.encode('utf-8').decode('utf-8') + True + + and, on a Unicode-enabled terminal with the right fonts, these both print the + Chinese characters for Confucius:: + + >>> print(a) + >>> print(str(a)) + + The implementation comes from django.utils.encoding. + """ + if not PY3: + cls.__unicode__ = cls.__str__ + cls.__str__ = lambda self: self.__unicode__().encode('utf-8') + return cls + + +def with_metaclass(meta, *bases): + """ + Function from jinja2/_compat.py. License: BSD. + + Use it like this:: + + class BaseForm(object): + pass + + class FormType(type): + pass + + class Form(with_metaclass(FormType, BaseForm)): + pass + + This requires a bit of explanation: the basic idea is to make a + dummy metaclass for one level of class instantiation that replaces + itself with the actual metaclass. Because of internal type checks + we also need to make sure that we downgrade the custom metaclass + for one level to something closer to type (that's why __call__ and + __init__ comes back from type etc.). + + This has the advantage over six.with_metaclass of not introducing + dummy classes into the final MRO. + """ + class metaclass(meta): + __call__ = type.__call__ + __init__ = type.__init__ + def __new__(cls, name, this_bases, d): + if this_bases is None: + return type.__new__(cls, name, (), d) + return meta(name, bases, d) + return metaclass('temporary_class', None, {}) + + +# Definitions from pandas.compat and six.py follow: +if PY3: + def bchr(s): + return bytes([s]) + def bstr(s): + if isinstance(s, str): + return bytes(s, 'latin-1') + else: + return bytes(s) + def bord(s): + return s + + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + +else: + # Python 2 + def bchr(s): + return chr(s) + def bstr(s): + return str(s) + def bord(s): + return ord(s) + + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + +### + +if PY3: + def tobytes(s): + if isinstance(s, bytes): + return s + else: + if isinstance(s, str): + return s.encode('latin-1') + else: + return bytes(s) +else: + # Python 2 + def tobytes(s): + if isinstance(s, unicode): + return s.encode('latin-1') + else: + return ''.join(s) + +tobytes.__doc__ = """ + Encodes to latin-1 (where the first 256 chars are the same as + ASCII.) + """ + +if PY3: + def native_str_to_bytes(s, encoding='utf-8'): + return s.encode(encoding) + + def bytes_to_native_str(b, encoding='utf-8'): + return b.decode(encoding) + + def text_to_native_str(t, encoding=None): + return t +else: + # Python 2 + def native_str_to_bytes(s, encoding=None): + from future.types import newbytes # to avoid a circular import + return newbytes(s) + + def bytes_to_native_str(b, encoding=None): + return native(b) + + def text_to_native_str(t, encoding='ascii'): + """ + Use this to create a Py2 native string when "from __future__ import + unicode_literals" is in effect. + """ + return unicode(t).encode(encoding) + +native_str_to_bytes.__doc__ = """ + On Py3, returns an encoded string. + On Py2, returns a newbytes type, ignoring the ``encoding`` argument. + """ + +if PY3: + # list-producing versions of the major Python iterating functions + def lrange(*args, **kwargs): + return list(range(*args, **kwargs)) + + def lzip(*args, **kwargs): + return list(zip(*args, **kwargs)) + + def lmap(*args, **kwargs): + return list(map(*args, **kwargs)) + + def lfilter(*args, **kwargs): + return list(filter(*args, **kwargs)) +else: + import __builtin__ + # Python 2-builtin ranges produce lists + lrange = __builtin__.range + lzip = __builtin__.zip + lmap = __builtin__.map + lfilter = __builtin__.filter + + +def isidentifier(s, dotted=False): + ''' + A function equivalent to the str.isidentifier method on Py3 + ''' + if dotted: + return all(isidentifier(a) for a in s.split('.')) + if PY3: + return s.isidentifier() + else: + import re + _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$") + return bool(_name_re.match(s)) + + +def viewitems(obj, **kwargs): + """ + Function for iterating over dictionary items with the same set-like + behaviour on Py2.7 as on Py3. + + Passes kwargs to method.""" + func = getattr(obj, "viewitems", None) + if not func: + func = obj.items + return func(**kwargs) + + +def viewkeys(obj, **kwargs): + """ + Function for iterating over dictionary keys with the same set-like + behaviour on Py2.7 as on Py3. + + Passes kwargs to method.""" + func = getattr(obj, "viewkeys", None) + if not func: + func = obj.keys + return func(**kwargs) + + +def viewvalues(obj, **kwargs): + """ + Function for iterating over dictionary values with the same set-like + behaviour on Py2.7 as on Py3. + + Passes kwargs to method.""" + func = getattr(obj, "viewvalues", None) + if not func: + func = obj.values + return func(**kwargs) + + +def iteritems(obj, **kwargs): + """Use this only if compatibility with Python versions before 2.7 is + required. Otherwise, prefer viewitems(). + """ + func = getattr(obj, "iteritems", None) + if not func: + func = obj.items + return func(**kwargs) + + +def iterkeys(obj, **kwargs): + """Use this only if compatibility with Python versions before 2.7 is + required. Otherwise, prefer viewkeys(). + """ + func = getattr(obj, "iterkeys", None) + if not func: + func = obj.keys + return func(**kwargs) + + +def itervalues(obj, **kwargs): + """Use this only if compatibility with Python versions before 2.7 is + required. Otherwise, prefer viewvalues(). + """ + func = getattr(obj, "itervalues", None) + if not func: + func = obj.values + return func(**kwargs) + + +def bind_method(cls, name, func): + """Bind a method to class, python 2 and python 3 compatible. + + Parameters + ---------- + + cls : type + class to receive bound method + name : basestring + name of method on class instance + func : function + function to be bound as method + + Returns + ------- + None + """ + # only python 2 has an issue with bound/unbound methods + if not PY3: + setattr(cls, name, types.MethodType(func, None, cls)) + else: + setattr(cls, name, func) + + +def getexception(): + return sys.exc_info()[1] + + +def _get_caller_globals_and_locals(): + """ + Returns the globals and locals of the calling frame. + + Is there an alternative to frame hacking here? + """ + caller_frame = inspect.stack()[2] + myglobals = caller_frame[0].f_globals + mylocals = caller_frame[0].f_locals + return myglobals, mylocals + + +def _repr_strip(mystring): + """ + Returns the string without any initial or final quotes. + """ + r = repr(mystring) + if r.startswith("'") and r.endswith("'"): + return r[1:-1] + else: + return r + + +if PY3: + def raise_from(exc, cause): + """ + Equivalent to: + + raise EXCEPTION from CAUSE + + on Python 3. (See PEP 3134). + """ + myglobals, mylocals = _get_caller_globals_and_locals() + + # We pass the exception and cause along with other globals + # when we exec(): + myglobals = myglobals.copy() + myglobals['__python_future_raise_from_exc'] = exc + myglobals['__python_future_raise_from_cause'] = cause + execstr = "raise __python_future_raise_from_exc from __python_future_raise_from_cause" + exec(execstr, myglobals, mylocals) + + def raise_(tp, value=None, tb=None): + """ + A function that matches the Python 2.x ``raise`` statement. This + allows re-raising exceptions with the cls value and traceback on + Python 2 and 3. + """ + if isinstance(tp, BaseException): + # If the first object is an instance, the type of the exception + # is the class of the instance, the instance itself is the value, + # and the second object must be None. + if value is not None: + raise TypeError("instance exception may not have a separate value") + exc = tp + elif isinstance(tp, type) and not issubclass(tp, BaseException): + # If the first object is a class, it becomes the type of the + # exception. + raise TypeError("class must derive from BaseException, not %s" % tp.__name__) + else: + # The second object is used to determine the exception value: If it + # is an instance of the class, the instance becomes the exception + # value. If the second object is a tuple, it is used as the argument + # list for the class constructor; if it is None, an empty argument + # list is used, and any other object is treated as a single argument + # to the constructor. The instance so created by calling the + # constructor is used as the exception value. + if isinstance(value, tp): + exc = value + elif isinstance(value, tuple): + exc = tp(*value) + elif value is None: + exc = tp() + else: + exc = tp(value) + + if exc.__traceback__ is not tb: + raise exc.with_traceback(tb) + raise exc + + def raise_with_traceback(exc, traceback=Ellipsis): + if traceback == Ellipsis: + _, _, traceback = sys.exc_info() + raise exc.with_traceback(traceback) + +else: + def raise_from(exc, cause): + """ + Equivalent to: + + raise EXCEPTION from CAUSE + + on Python 3. (See PEP 3134). + """ + # Is either arg an exception class (e.g. IndexError) rather than + # instance (e.g. IndexError('my message here')? If so, pass the + # name of the class undisturbed through to "raise ... from ...". + if isinstance(exc, type) and issubclass(exc, Exception): + e = exc() + # exc = exc.__name__ + # execstr = "e = " + _repr_strip(exc) + "()" + # myglobals, mylocals = _get_caller_globals_and_locals() + # exec(execstr, myglobals, mylocals) + else: + e = exc + e.__suppress_context__ = False + if isinstance(cause, type) and issubclass(cause, Exception): + e.__cause__ = cause() + e.__cause__.__traceback__ = sys.exc_info()[2] + e.__suppress_context__ = True + elif cause is None: + e.__cause__ = None + e.__suppress_context__ = True + elif isinstance(cause, BaseException): + e.__cause__ = cause + object.__setattr__(e.__cause__, '__traceback__', sys.exc_info()[2]) + e.__suppress_context__ = True + else: + raise TypeError("exception causes must derive from BaseException") + e.__context__ = sys.exc_info()[1] + raise e + + exec(''' +def raise_(tp, value=None, tb=None): + raise tp, value, tb + +def raise_with_traceback(exc, traceback=Ellipsis): + if traceback == Ellipsis: + _, _, traceback = sys.exc_info() + raise exc, None, traceback +'''.strip()) + + +raise_with_traceback.__doc__ = ( +"""Raise exception with existing traceback. +If traceback is not passed, uses sys.exc_info() to get traceback.""" +) + + +# Deprecated alias for backward compatibility with ``future`` versions < 0.11: +reraise = raise_ + + +def implements_iterator(cls): + ''' + From jinja2/_compat.py. License: BSD. + + Use as a decorator like this:: + + @implements_iterator + class UppercasingIterator(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def __iter__(self): + return self + def __next__(self): + return next(self._iter).upper() + + ''' + if PY3: + return cls + else: + cls.next = cls.__next__ + del cls.__next__ + return cls + +if PY3: + get_next = lambda x: x.__next__ +else: + get_next = lambda x: x.next + + +def encode_filename(filename): + if PY3: + return filename + else: + if isinstance(filename, unicode): + return filename.encode('utf-8') + return filename + + +def is_new_style(cls): + """ + Python 2.7 has both new-style and old-style classes. Old-style classes can + be pesky in some circumstances, such as when using inheritance. Use this + function to test for whether a class is new-style. (Python 3 only has + new-style classes.) + """ + return hasattr(cls, '__class__') and ('__dict__' in dir(cls) + or hasattr(cls, '__slots__')) + +# The native platform string and bytes types. Useful because ``str`` and +# ``bytes`` are redefined on Py2 by ``from future.builtins import *``. +native_str = str +native_bytes = bytes + + +def istext(obj): + """ + Deprecated. Use:: + >>> isinstance(obj, str) + after this import: + >>> from future.builtins import str + """ + return isinstance(obj, type(u'')) + + +def isbytes(obj): + """ + Deprecated. Use:: + >>> isinstance(obj, bytes) + after this import: + >>> from future.builtins import bytes + """ + return isinstance(obj, type(b'')) + + +def isnewbytes(obj): + """ + Equivalent to the result of ``type(obj) == type(newbytes)`` + in other words, it is REALLY a newbytes instance, not a Py2 native str + object? + + Note that this does not cover subclasses of newbytes, and it is not + equivalent to ininstance(obj, newbytes) + """ + return type(obj).__name__ == 'newbytes' + + +def isint(obj): + """ + Deprecated. Tests whether an object is a Py3 ``int`` or either a Py2 ``int`` or + ``long``. + + Instead of using this function, you can use: + + >>> from future.builtins import int + >>> isinstance(obj, int) + + The following idiom is equivalent: + + >>> from numbers import Integral + >>> isinstance(obj, Integral) + """ + + return isinstance(obj, numbers.Integral) + + +def native(obj): + """ + On Py3, this is a no-op: native(obj) -> obj + + On Py2, returns the corresponding native Py2 types that are + superclasses for backported objects from Py3: + + >>> from builtins import str, bytes, int + + >>> native(str(u'ABC')) + u'ABC' + >>> type(native(str(u'ABC'))) + unicode + + >>> native(bytes(b'ABC')) + b'ABC' + >>> type(native(bytes(b'ABC'))) + bytes + + >>> native(int(10**20)) + 100000000000000000000L + >>> type(native(int(10**20))) + long + + Existing native types on Py2 will be returned unchanged: + + >>> type(native(u'ABC')) + unicode + """ + if hasattr(obj, '__native__'): + return obj.__native__() + else: + return obj + + +# Implementation of exec_ is from ``six``: +if PY3: + import builtins + exec_ = getattr(builtins, "exec") +else: + def exec_(code, globs=None, locs=None): + """Execute code in a namespace.""" + if globs is None: + frame = sys._getframe(1) + globs = frame.f_globals + if locs is None: + locs = frame.f_locals + del frame + elif locs is None: + locs = globs + exec("""exec code in globs, locs""") + + +# Defined here for backward compatibility: +def old_div(a, b): + """ + DEPRECATED: import ``old_div`` from ``past.utils`` instead. + + Equivalent to ``a / b`` on Python 2 without ``from __future__ import + division``. + + TODO: generalize this to other objects (like arrays etc.) + """ + if isinstance(a, numbers.Integral) and isinstance(b, numbers.Integral): + return a // b + else: + return a / b + + +def as_native_str(encoding='utf-8'): + ''' + A decorator to turn a function or method call that returns text, i.e. + unicode, into one that returns a native platform str. + + Use it as a decorator like this:: + + from __future__ import unicode_literals + + class MyClass(object): + @as_native_str(encoding='ascii') + def __repr__(self): + return next(self._iter).upper() + ''' + if PY3: + return lambda f: f + else: + def encoder(f): + @functools.wraps(f) + def wrapper(*args, **kwargs): + return f(*args, **kwargs).encode(encoding=encoding) + return wrapper + return encoder + +# listvalues and listitems definitions from Nick Coghlan's (withdrawn) +# PEP 496: +try: + dict.iteritems +except AttributeError: + # Python 3 + def listvalues(d): + return list(d.values()) + def listitems(d): + return list(d.items()) +else: + # Python 2 + def listvalues(d): + return d.values() + def listitems(d): + return d.items() + +if PY3: + def ensure_new_type(obj): + return obj +else: + def ensure_new_type(obj): + from future.types.newbytes import newbytes + from future.types.newstr import newstr + from future.types.newint import newint + from future.types.newdict import newdict + + native_type = type(native(obj)) + + # Upcast only if the type is already a native (non-future) type + if issubclass(native_type, type(obj)): + # Upcast + if native_type == str: # i.e. Py2 8-bit str + return newbytes(obj) + elif native_type == unicode: + return newstr(obj) + elif native_type == int: + return newint(obj) + elif native_type == long: + return newint(obj) + elif native_type == dict: + return newdict(obj) + else: + return obj + else: + # Already a new type + assert type(obj) in [newbytes, newstr] + return obj + + +__all__ = ['PY2', 'PY26', 'PY3', 'PYPY', + 'as_native_str', 'binary_type', 'bind_method', 'bord', 'bstr', + 'bytes_to_native_str', 'class_types', 'encode_filename', + 'ensure_new_type', 'exec_', 'get_next', 'getexception', + 'implements_iterator', 'integer_types', 'is_new_style', 'isbytes', + 'isidentifier', 'isint', 'isnewbytes', 'istext', 'iteritems', + 'iterkeys', 'itervalues', 'lfilter', 'listitems', 'listvalues', + 'lmap', 'lrange', 'lzip', 'native', 'native_bytes', 'native_str', + 'native_str_to_bytes', 'old_div', + 'python_2_unicode_compatible', 'raise_', + 'raise_with_traceback', 'reraise', 'string_types', + 'text_to_native_str', 'text_type', 'tobytes', 'viewitems', + 'viewkeys', 'viewvalues', 'with_metaclass' + ] diff --git a/src/future/utils/surrogateescape.py b/src/future/utils/surrogateescape.py new file mode 100644 index 00000000..0dcc9fa6 --- /dev/null +++ b/src/future/utils/surrogateescape.py @@ -0,0 +1,198 @@ +""" +This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error +handler of Python 3. + +Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc +""" + +# This code is released under the Python license and the BSD 2-clause license + +import codecs +import sys + +from future import utils + + +FS_ERRORS = 'surrogateescape' + +# # -- Python 2/3 compatibility ------------------------------------- +# FS_ERRORS = 'my_surrogateescape' + +def u(text): + if utils.PY3: + return text + else: + return text.decode('unicode_escape') + +def b(data): + if utils.PY3: + return data.encode('latin1') + else: + return data + +if utils.PY3: + _unichr = chr + bytes_chr = lambda code: bytes((code,)) +else: + _unichr = unichr + bytes_chr = chr + +def surrogateescape_handler(exc): + """ + Pure Python implementation of the PEP 383: the "surrogateescape" error + handler of Python 3. Undecodable bytes will be replaced by a Unicode + character U+DCxx on decoding, and these are translated into the + original bytes on encoding. + """ + mystring = exc.object[exc.start:exc.end] + + try: + if isinstance(exc, UnicodeDecodeError): + # mystring is a byte-string in this case + decoded = replace_surrogate_decode(mystring) + elif isinstance(exc, UnicodeEncodeError): + # In the case of u'\udcc3'.encode('ascii', + # 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an + # exception anyway after this function is called, even though I think + # it's doing what it should. It seems that the strict encoder is called + # to encode the unicode string that this function returns ... + decoded = replace_surrogate_encode(mystring) + else: + raise exc + except NotASurrogateError: + raise exc + return (decoded, exc.end) + + +class NotASurrogateError(Exception): + pass + + +def replace_surrogate_encode(mystring): + """ + Returns a (unicode) string, not the more logical bytes, because the codecs + register_error functionality expects this. + """ + decoded = [] + for ch in mystring: + # if utils.PY3: + # code = ch + # else: + code = ord(ch) + + # The following magic comes from Py3.3's Python/codecs.c file: + if not 0xD800 <= code <= 0xDCFF: + # Not a surrogate. Fail with the original exception. + raise NotASurrogateError + # mybytes = [0xe0 | (code >> 12), + # 0x80 | ((code >> 6) & 0x3f), + # 0x80 | (code & 0x3f)] + # Is this a good idea? + if 0xDC00 <= code <= 0xDC7F: + decoded.append(_unichr(code - 0xDC00)) + elif code <= 0xDCFF: + decoded.append(_unichr(code - 0xDC00)) + else: + raise NotASurrogateError + return str().join(decoded) + + +def replace_surrogate_decode(mybytes): + """ + Returns a (unicode) string + """ + decoded = [] + for ch in mybytes: + # We may be parsing newbytes (in which case ch is an int) or a native + # str on Py2 + if isinstance(ch, int): + code = ch + else: + code = ord(ch) + if 0x80 <= code <= 0xFF: + decoded.append(_unichr(0xDC00 + code)) + elif code <= 0x7F: + decoded.append(_unichr(code)) + else: + # # It may be a bad byte + # # Try swallowing it. + # continue + # print("RAISE!") + raise NotASurrogateError + return str().join(decoded) + + +def encodefilename(fn): + if FS_ENCODING == 'ascii': + # ASCII encoder of Python 2 expects that the error handler returns a + # Unicode string encodable to ASCII, whereas our surrogateescape error + # handler has to return bytes in 0x80-0xFF range. + encoded = [] + for index, ch in enumerate(fn): + code = ord(ch) + if code < 128: + ch = bytes_chr(code) + elif 0xDC80 <= code <= 0xDCFF: + ch = bytes_chr(code - 0xDC00) + else: + raise UnicodeEncodeError(FS_ENCODING, + fn, index, index+1, + 'ordinal not in range(128)') + encoded.append(ch) + return bytes().join(encoded) + elif FS_ENCODING == 'utf-8': + # UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF + # doesn't go through our error handler + encoded = [] + for index, ch in enumerate(fn): + code = ord(ch) + if 0xD800 <= code <= 0xDFFF: + if 0xDC80 <= code <= 0xDCFF: + ch = bytes_chr(code - 0xDC00) + encoded.append(ch) + else: + raise UnicodeEncodeError( + FS_ENCODING, + fn, index, index+1, 'surrogates not allowed') + else: + ch_utf8 = ch.encode('utf-8') + encoded.append(ch_utf8) + return bytes().join(encoded) + else: + return fn.encode(FS_ENCODING, FS_ERRORS) + +def decodefilename(fn): + return fn.decode(FS_ENCODING, FS_ERRORS) + +FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]') +# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]') +# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]') + + +# normalize the filesystem encoding name. +# For example, we expect "utf-8", not "UTF8". +FS_ENCODING = codecs.lookup(FS_ENCODING).name + + +def register_surrogateescape(): + """ + Registers the surrogateescape error handler on Python 2 (only) + """ + if utils.PY3: + return + try: + codecs.lookup_error(FS_ERRORS) + except LookupError: + codecs.register_error(FS_ERRORS, surrogateescape_handler) + + +if __name__ == '__main__': + pass + # # Tests: + # register_surrogateescape() + + # b = decodefilename(fn) + # assert b == encoded, "%r != %r" % (b, encoded) + # c = encodefilename(b) + # assert c == fn, '%r != %r' % (c, fn) + # # print("ok") diff --git a/src/html/__init__.py b/src/html/__init__.py new file mode 100644 index 00000000..e957e745 --- /dev/null +++ b/src/html/__init__.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import +import sys + +if sys.version_info[0] < 3: + from future.moves.html import * +else: + raise ImportError('This package should not be accessible on Python 3. ' + 'Either you are trying to run from the python-future src folder ' + 'or your installation of python-future is corrupted.') diff --git a/src/html/entities.py b/src/html/entities.py new file mode 100644 index 00000000..211649e5 --- /dev/null +++ b/src/html/entities.py @@ -0,0 +1,7 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from html.entities import * +else: + from future.moves.html.entities import * diff --git a/src/html/parser.py b/src/html/parser.py new file mode 100644 index 00000000..e3948879 --- /dev/null +++ b/src/html/parser.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +import sys +__future_module__ = True + +if sys.version_info[0] >= 3: + raise ImportError('Cannot import module from python-future source folder') +else: + from future.moves.html.parser import * diff --git a/src/http/__init__.py b/src/http/__init__.py new file mode 100644 index 00000000..e4f853e5 --- /dev/null +++ b/src/http/__init__.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import +import sys + +if sys.version_info[0] < 3: + pass +else: + raise ImportError('This package should not be accessible on Python 3. ' + 'Either you are trying to run from the python-future src folder ' + 'or your installation of python-future is corrupted.') diff --git a/src/http/client.py b/src/http/client.py new file mode 100644 index 00000000..a6a31006 --- /dev/null +++ b/src/http/client.py @@ -0,0 +1,90 @@ +from __future__ import absolute_import +import sys + +assert sys.version_info[0] < 3 + +from httplib import * +from httplib import HTTPMessage + +# These constants aren't included in __all__ in httplib.py: + +from httplib import (HTTP_PORT, + HTTPS_PORT, + + CONTINUE, + SWITCHING_PROTOCOLS, + PROCESSING, + + OK, + CREATED, + ACCEPTED, + NON_AUTHORITATIVE_INFORMATION, + NO_CONTENT, + RESET_CONTENT, + PARTIAL_CONTENT, + MULTI_STATUS, + IM_USED, + + MULTIPLE_CHOICES, + MOVED_PERMANENTLY, + FOUND, + SEE_OTHER, + NOT_MODIFIED, + USE_PROXY, + TEMPORARY_REDIRECT, + + BAD_REQUEST, + UNAUTHORIZED, + PAYMENT_REQUIRED, + FORBIDDEN, + NOT_FOUND, + METHOD_NOT_ALLOWED, + NOT_ACCEPTABLE, + PROXY_AUTHENTICATION_REQUIRED, + REQUEST_TIMEOUT, + CONFLICT, + GONE, + LENGTH_REQUIRED, + PRECONDITION_FAILED, + REQUEST_ENTITY_TOO_LARGE, + REQUEST_URI_TOO_LONG, + UNSUPPORTED_MEDIA_TYPE, + REQUESTED_RANGE_NOT_SATISFIABLE, + EXPECTATION_FAILED, + UNPROCESSABLE_ENTITY, + LOCKED, + FAILED_DEPENDENCY, + UPGRADE_REQUIRED, + + INTERNAL_SERVER_ERROR, + NOT_IMPLEMENTED, + BAD_GATEWAY, + SERVICE_UNAVAILABLE, + GATEWAY_TIMEOUT, + HTTP_VERSION_NOT_SUPPORTED, + INSUFFICIENT_STORAGE, + NOT_EXTENDED, + + MAXAMOUNT, + ) + +# These are not available on Python 2.6.x: +try: + from httplib import LineTooLong, LineAndFileWrapper +except ImportError: + pass + +# These may not be available on all versions of Python 2.6.x or 2.7.x +try: + from httplib import ( + _CS_IDLE, + _CS_REQ_STARTED, + _CS_REQ_SENT, + _MAXLINE, + _MAXHEADERS, + _is_legal_header_name, + _is_illegal_header_value, + _METHODS_EXPECTING_BODY + ) +except ImportError: + pass diff --git a/src/http/cookiejar.py b/src/http/cookiejar.py new file mode 100644 index 00000000..d847b2bf --- /dev/null +++ b/src/http/cookiejar.py @@ -0,0 +1,6 @@ +from __future__ import absolute_import +import sys + +assert sys.version_info[0] < 3 + +from cookielib import * diff --git a/src/http/cookies.py b/src/http/cookies.py new file mode 100644 index 00000000..eb2a8238 --- /dev/null +++ b/src/http/cookies.py @@ -0,0 +1,7 @@ +from __future__ import absolute_import +import sys + +assert sys.version_info[0] < 3 + +from Cookie import * +from Cookie import Morsel # left out of __all__ on Py2.7! diff --git a/src/http/server.py b/src/http/server.py new file mode 100644 index 00000000..29710557 --- /dev/null +++ b/src/http/server.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import +import sys + +assert sys.version_info[0] < 3 + +from BaseHTTPServer import * +from CGIHTTPServer import * +from SimpleHTTPServer import * +try: + from CGIHTTPServer import _url_collapse_path # needed for a test +except ImportError: + try: + # Python 2.7.0 to 2.7.3 + from CGIHTTPServer import ( + _url_collapse_path_split as _url_collapse_path) + except ImportError: + # Doesn't exist on Python 2.6.x. Ignore it. + pass diff --git a/libfuturize/__init__.py b/src/libfuturize/__init__.py similarity index 100% rename from libfuturize/__init__.py rename to src/libfuturize/__init__.py diff --git a/libfuturize/fixer_util.py b/src/libfuturize/fixer_util.py similarity index 65% rename from libfuturize/fixer_util.py rename to src/libfuturize/fixer_util.py index 16f52591..b5c123f6 100644 --- a/libfuturize/fixer_util.py +++ b/src/libfuturize/fixer_util.py @@ -9,11 +9,44 @@ """ from lib2to3.fixer_util import (FromImport, Newline, is_import, - find_root, does_tree_import) + find_root, does_tree_import, + Call, Name, Comma) from lib2to3.pytree import Leaf, Node -from lib2to3.pygram import python_symbols as syms, python_grammar -# from lib2to3.pgen2 import token +from lib2to3.pygram import python_symbols as syms from lib2to3.pygram import token +import re + + +def canonical_fix_name(fix, avail_fixes): + """ + Examples: + >>> canonical_fix_name('fix_wrap_text_literals') + 'libfuturize.fixes.fix_wrap_text_literals' + >>> canonical_fix_name('wrap_text_literals') + 'libfuturize.fixes.fix_wrap_text_literals' + >>> canonical_fix_name('wrap_te') + ValueError("unknown fixer name") + >>> canonical_fix_name('wrap') + ValueError("ambiguous fixer name") + """ + if ".fix_" in fix: + return fix + else: + if fix.startswith('fix_'): + fix = fix[4:] + # Infer the full module name for the fixer. + # First ensure that no names clash (e.g. + # lib2to3.fixes.fix_blah and libfuturize.fixes.fix_blah): + found = [f for f in avail_fixes + if f.endswith('fix_{0}'.format(fix))] + if len(found) > 1: + raise ValueError("Ambiguous fixer name. Choose a fully qualified " + "module name instead from these:\n" + + "\n".join(" " + myf for myf in found)) + elif len(found) == 0: + raise ValueError("Unknown fixer. Use --list-fixes or -l for a list.") + return found[0] + ## These functions are from 3to2 by Joe Amenta: @@ -28,8 +61,8 @@ def Minus(prefix=None): return Leaf(token.MINUS, u'-', prefix=prefix) def commatize(leafs): - u""" - Accepts/turns: (Name, Name, ..., Name, Name) + """ + Accepts/turns: (Name, Name, ..., Name, Name) Returns/into: (Name, Comma, Name, Comma, ..., Name, Comma, Name) """ new_leafs = [] @@ -40,7 +73,7 @@ def commatize(leafs): return new_leafs def indentation(node): - u""" + """ Returns the indentation for this node Iff a node is in a suite, then it has indentation. """ @@ -61,7 +94,7 @@ def indentation(node): return node.prefix def indentation_step(node): - u""" + """ Dirty little trick to get the difference between each indentation level Implemented by finding the shortest indentation string (technically, the "least" of all of the indentation strings, but @@ -77,13 +110,13 @@ def indentation_step(node): return min(all_indents) def suitify(parent): - u""" + """ Turn the stuff after the first colon in parent's children into a suite, if it wasn't already """ for node in parent.children: if node.type == syms.suite: - # already in the prefered format, do nothing + # already in the preferred format, do nothing return # One-liners have no suite node, we have to fake one up @@ -101,7 +134,7 @@ def suitify(parent): parent.append_child(suite) def NameImport(package, as_name=None, prefix=None): - u""" + """ Accepts a package (Name node), name to import it as (string), and optional prefix and returns a node: import [as ] @@ -118,7 +151,7 @@ def NameImport(package, as_name=None, prefix=None): _import_stmts = (syms.import_name, syms.import_from) def import_binding_scope(node): - u""" + """ Generator yields all nodes for which a node (an import_stmt) has scope The purpose of this is for a call to _find() on each of them """ @@ -186,6 +219,14 @@ def ImportAsName(name, as_name, prefix=None): return new_node +def is_docstring(node): + """ + Returns True if the node appears to be a docstring + """ + return (node.type == syms.simple_stmt and + len(node.children) > 0 and node.children[0].type == token.STRING) + + def future_import(feature, node): """ This seems to work @@ -195,9 +236,14 @@ def future_import(feature, node): if does_tree_import(u"__future__", feature, node): return + # Look for a shebang or encoding line + shebang_encoding_idx = None + for idx, node in enumerate(root.children): - if node.type == syms.simple_stmt and \ - len(node.children) > 0 and node.children[0].type == token.STRING: + # Is it a shebang or encoding line? + if is_shebang_comment(node) or is_encoding_comment(node): + shebang_encoding_idx = idx + if is_docstring(node): # skip over docstring continue names = check_future_import(node) @@ -209,7 +255,15 @@ def future_import(feature, node): return import_ = FromImport(u'__future__', [Leaf(token.NAME, feature, prefix=" ")]) - children = [import_, Newline()] + if shebang_encoding_idx == 0 and idx == 0: + # If this __future__ import would go on the first line, + # detach the shebang / encoding prefix from the current first line. + # and attach it to our new __future__ import node. + import_.prefix = root.children[0].prefix + root.children[0].prefix = u'' + # End the __future__ import line with a newline and add a blank line + # afterwards: + children = [import_ , Newline()] root.insert_child(idx, Node(syms.simple_stmt, children)) @@ -218,7 +272,7 @@ def future_import2(feature, node): An alternative to future_import() which might not work ... """ root = find_root(node) - + if does_tree_import(u"__future__", feature, node): return @@ -250,7 +304,7 @@ def parse_args(arglist, scheme): Parse a list of arguments into a dict """ arglist = [i for i in arglist if i.type != token.COMMA] - + ret_mapping = dict([(k, None) for k in scheme]) for i, arg in enumerate(arglist): @@ -278,17 +332,26 @@ def is_import_stmt(node): def touch_import_top(package, name_to_import, node): """Works like `does_tree_import` but adds an import statement at the - top if it was not imported (but below any __future__ imports). + top if it was not imported (but below any __future__ imports) and below any + comments such as shebang lines). - Calling this multiple times adds them in reverse order. - Based on lib2to3.fixer_util.touch_import() + + Calling this multiple times adds the imports in reverse order. + + Also adds "standard_library.install_aliases()" after "from future import + standard_library". This should probably be factored into another function. """ + root = find_root(node) if does_tree_import(package, name_to_import, root): return + # Ideally, we would look for whether futurize --all-imports has been run, + # as indicated by the presence of ``from builtins import (ascii, ..., + # zip)`` -- and, if it has, we wouldn't import the name again. + # Look for __future__ imports and insert below them found = False for name in ['absolute_import', 'division', 'print_function', @@ -316,12 +379,18 @@ def touch_import_top(package, name_to_import, node): assert end is not None insert_pos = end else: - # No __future__ imports + # No __future__ imports. + # We look for a docstring and insert the new node below that. If no docstring + # exists, just insert the node at the top. for idx, node in enumerate(root.children): - if node.type == syms.simple_stmt: # and node.children and node.children[0].type == token.STRING): + if node.type != syms.simple_stmt: + break + if not is_docstring(node): + # This is the usual case. break insert_pos = idx + children_hooks = [] if package is None: import_ = Node(syms.import_name, [ Leaf(token.NAME, u"import"), @@ -329,9 +398,31 @@ def touch_import_top(package, name_to_import, node): ]) else: import_ = FromImport(package, [Leaf(token.NAME, name_to_import, prefix=u" ")]) - - children = [import_, Newline()] - root.insert_child(insert_pos, Node(syms.simple_stmt, children)) + if name_to_import == u'standard_library': + # Add: + # standard_library.install_aliases() + # after: + # from future import standard_library + install_hooks = Node(syms.simple_stmt, + [Node(syms.power, + [Leaf(token.NAME, u'standard_library'), + Node(syms.trailer, [Leaf(token.DOT, u'.'), + Leaf(token.NAME, u'install_aliases')]), + Node(syms.trailer, [Leaf(token.LPAR, u'('), + Leaf(token.RPAR, u')')]) + ]) + ] + ) + children_hooks = [install_hooks, Newline()] + + # FromImport(package, [Leaf(token.NAME, name_to_import, prefix=u" ")]) + + children_import = [import_, Newline()] + old_prefix = root.children[insert_pos].prefix + root.children[insert_pos].prefix = u'' + root.insert_child(insert_pos, Node(syms.simple_stmt, children_import, prefix=old_prefix)) + if len(children_hooks) > 0: + root.insert_child(insert_pos + 1, Node(syms.simple_stmt, children_hooks)) ## The following functions are from python-modernize by Armin Ronacher: @@ -341,6 +432,7 @@ def check_future_import(node): """If this is a future import, return set of symbols that are imported, else return None.""" # node should be the import statement here + savenode = node if not (node.type == syms.simple_stmt and node.children): return set() node = node.children[0] @@ -350,9 +442,11 @@ def check_future_import(node): hasattr(node.children[1], 'value') and node.children[1].value == u'__future__'): return set() - node = node.children[3] + if node.children[3].type == token.LPAR: + node = node.children[4] + else: + node = node.children[3] # now node is the import_as_name[s] - # print(python_grammar.number2symbol[node.type]) # breaks sometimes if node.type == syms.import_as_names: result = set() for n in node.children: @@ -370,6 +464,55 @@ def check_future_import(node): elif node.type == token.NAME: return set([node.value]) else: - assert 0, "strange import" + # TODO: handle brackets like this: + # from __future__ import (absolute_import, division) + assert False, "strange import: %s" % savenode + + +SHEBANG_REGEX = r'^#!.*python' +ENCODING_REGEX = r"^#.*coding[:=]\s*([-\w.]+)" +def is_shebang_comment(node): + """ + Comments are prefixes for Leaf nodes. Returns whether the given node has a + prefix that looks like a shebang line or an encoding line: + + #!/usr/bin/env python + #!/usr/bin/python3 + """ + return bool(re.match(SHEBANG_REGEX, node.prefix)) + + +def is_encoding_comment(node): + """ + Comments are prefixes for Leaf nodes. Returns whether the given node has a + prefix that looks like an encoding line: + + # coding: utf-8 + # encoding: utf-8 + # -*- coding: -*- + # vim: set fileencoding= : + """ + return bool(re.match(ENCODING_REGEX, node.prefix)) + + +def wrap_in_fn_call(fn_name, args, prefix=None): + """ + Example: + >>> wrap_in_fn_call("oldstr", (arg,)) + oldstr(arg) + + >>> wrap_in_fn_call("olddiv", (arg1, arg2)) + olddiv(arg1, arg2) + + >>> wrap_in_fn_call("olddiv", [arg1, comma, arg2, comma, arg3]) + olddiv(arg1, arg2, arg3) + """ + assert len(args) > 0 + if len(args) == 2: + expr1, expr2 = args + newargs = [expr1, Comma(), expr2] + else: + newargs = args + return Call(Name(fn_name), newargs, prefix=prefix) diff --git a/libfuturize/fixes2/__init__.py b/src/libfuturize/fixes/__init__.py similarity index 50% rename from libfuturize/fixes2/__init__.py rename to src/libfuturize/fixes/__init__.py index c5c7a312..0b562501 100644 --- a/libfuturize/fixes2/__init__.py +++ b/src/libfuturize/fixes/__init__.py @@ -8,10 +8,9 @@ lib2to3_fix_names_stage1 = set([ 'lib2to3.fixes.fix_apply', 'lib2to3.fixes.fix_except', - 'lib2to3.fixes.fix_execfile', + 'lib2to3.fixes.fix_exec', 'lib2to3.fixes.fix_exitfunc', 'lib2to3.fixes.fix_funcattrs', - 'lib2to3.fixes.fix_filter', 'lib2to3.fixes.fix_has_key', 'lib2to3.fixes.fix_idioms', # 'lib2to3.fixes.fix_import', # makes any implicit relative imports explicit. (Use with ``from __future__ import absolute_import) @@ -19,12 +18,15 @@ 'lib2to3.fixes.fix_isinstance', 'lib2to3.fixes.fix_methodattrs', 'lib2to3.fixes.fix_ne', + # 'lib2to3.fixes.fix_next', # would replace ``next`` method names + # with ``__next__``. 'lib2to3.fixes.fix_numliterals', # turns 1L into 1, 0755 into 0o755 'lib2to3.fixes.fix_paren', - # 'lib2to3.fixes.fix_print', + # 'lib2to3.fixes.fix_print', # see the libfuturize fixer that also + # adds ``from __future__ import print_function`` # 'lib2to3.fixes.fix_raise', # uses incompatible with_traceback() method on exceptions - 'lib2to3.fixes.fix_renames', - 'lib2to3.fixes.fix_reduce', + 'lib2to3.fixes.fix_reduce', # reduce is available in functools on Py2.6/Py2.7 + 'lib2to3.fixes.fix_renames', # sys.maxint -> sys.maxsize # 'lib2to3.fixes.fix_set_literal', # this is unnecessary and breaks Py2.6 support 'lib2to3.fixes.fix_repr', 'lib2to3.fixes.fix_standarderror', @@ -32,53 +34,64 @@ 'lib2to3.fixes.fix_throw', 'lib2to3.fixes.fix_tuple_params', 'lib2to3.fixes.fix_types', - 'lib2to3.fixes.fix_ws_comma', + 'lib2to3.fixes.fix_ws_comma', # can perhaps decrease readability: see issue #58 'lib2to3.fixes.fix_xreadlines', ]) # The following fixers add a dependency on the ``future`` package on order to # support Python 2: lib2to3_fix_names_stage2 = set([ - 'lib2to3.fixes.fix_basestring', # 'lib2to3.fixes.fix_buffer', # perhaps not safe. Test this. # 'lib2to3.fixes.fix_callable', # not needed in Py3.2+ - 'lib2to3.fixes.fix_dict', # TODO: add support for utils.viewitems() etc. - 'lib2to3.fixes.fix_exec', + 'lib2to3.fixes.fix_dict', # TODO: add support for utils.viewitems() etc. and move to stage2 + # 'lib2to3.fixes.fix_execfile', # some problems: see issue #37. + # We use a custom fixer instead (see below) # 'lib2to3.fixes.fix_future', # we don't want to remove __future__ imports 'lib2to3.fixes.fix_getcwdu', - # 'lib2to3.fixes.fix_imports', # called by libfuturize.fixes2.fix_future_standard_library + # 'lib2to3.fixes.fix_imports', # called by libfuturize.fixes.fix_future_standard_library # 'lib2to3.fixes.fix_imports2', # we don't handle this yet (dbm) - 'lib2to3.fixes.fix_input', + # 'lib2to3.fixes.fix_input', # Called conditionally by libfuturize.fixes.fix_input 'lib2to3.fixes.fix_itertools', 'lib2to3.fixes.fix_itertools_imports', + 'lib2to3.fixes.fix_filter', 'lib2to3.fixes.fix_long', 'lib2to3.fixes.fix_map', # 'lib2to3.fixes.fix_metaclass', # causes SyntaxError in Py2! Use the one from ``six`` instead 'lib2to3.fixes.fix_next', - 'lib2to3.fixes.fix_nonzero', # TODO: add a decorator for mapping __bool__ to __nonzero__ + 'lib2to3.fixes.fix_nonzero', # TODO: cause this to import ``object`` and/or add a decorator for mapping __bool__ to __nonzero__ 'lib2to3.fixes.fix_operator', # we will need support for this by e.g. extending the Py2 operator module to provide those functions in Py3 'lib2to3.fixes.fix_raw_input', # 'lib2to3.fixes.fix_unicode', # strips off the u'' prefix, which removes a potentially helpful source of information for disambiguating unicode/byte strings - 'lib2to3.fixes.fix_urllib', - 'lib2to3.fixes.fix_xrange', + # 'lib2to3.fixes.fix_urllib', # included in libfuturize.fix_future_standard_library_urllib + # 'lib2to3.fixes.fix_xrange', # custom one because of a bug with Py3.3's lib2to3 'lib2to3.fixes.fix_zip', ]) -libfuturize_2fix_names_stage1 = set([ - 'libfuturize.fixes2.fix_absolute_import', - 'libfuturize.fixes2.fix_division', - 'libfuturize.fixes2.fix_print_with_import', - 'libfuturize.fixes2.fix_raise', - 'libfuturize.fixes2.fix_order___future__imports', # TODO: consolidate to a single line to simplify testing +libfuturize_fix_names_stage1 = set([ + 'libfuturize.fixes.fix_absolute_import', + 'libfuturize.fixes.fix_next_call', # obj.next() -> next(obj). Unlike + # lib2to3.fixes.fix_next, doesn't change + # the ``next`` method to ``__next__``. + 'libfuturize.fixes.fix_print_with_import', + 'libfuturize.fixes.fix_raise', + # 'libfuturize.fixes.fix_order___future__imports', # TODO: consolidate to a single line to simplify testing ]) -libfuturize_2fix_names_stage2 = set([ - 'libfuturize.fixes2.fix_future_builtins', - 'libfuturize.fixes2.fix_future_standard_library', - 'libfuturize.fixes2.fix_metaclass', - # TODO: add int(33243) calls for what used to be 33243L - 'libfuturize.fixes2.fix_order___future__imports', # TODO: consolidate to a single line to simplify testing - 'libfuturize.fixes2.fix_unicode_keep_u', - 'libfuturize.fixes2.fix_unicode_literals_import', +libfuturize_fix_names_stage2 = set([ + 'libfuturize.fixes.fix_basestring', + # 'libfuturize.fixes.fix_add__future__imports_except_unicode_literals', # just in case + 'libfuturize.fixes.fix_cmp', + 'libfuturize.fixes.fix_division_safe', + 'libfuturize.fixes.fix_execfile', + 'libfuturize.fixes.fix_future_builtins', + 'libfuturize.fixes.fix_future_standard_library', + 'libfuturize.fixes.fix_future_standard_library_urllib', + 'libfuturize.fixes.fix_input', + 'libfuturize.fixes.fix_metaclass', + 'libpasteurize.fixes.fix_newstyle', + 'libfuturize.fixes.fix_object', + # 'libfuturize.fixes.fix_order___future__imports', # TODO: consolidate to a single line to simplify testing + 'libfuturize.fixes.fix_unicode_keep_u', + # 'libfuturize.fixes.fix_unicode_literals_import', + 'libfuturize.fixes.fix_xrange_with_import', # custom one because of a bug with Py3.3's lib2to3 ]) - diff --git a/src/libfuturize/fixes/fix_UserDict.py b/src/libfuturize/fixes/fix_UserDict.py new file mode 100644 index 00000000..cb0cfacc --- /dev/null +++ b/src/libfuturize/fixes/fix_UserDict.py @@ -0,0 +1,102 @@ +"""Fix UserDict. + +Incomplete! + +TODO: base this on fix_urllib perhaps? +""" + + +# Local imports +from lib2to3 import fixer_base +from lib2to3.fixer_util import Name, attr_chain +from lib2to3.fixes.fix_imports import alternates, build_pattern, FixImports + +MAPPING = {'UserDict': 'collections', +} + +# def alternates(members): +# return "(" + "|".join(map(repr, members)) + ")" +# +# +# def build_pattern(mapping=MAPPING): +# mod_list = ' | '.join(["module_name='%s'" % key for key in mapping]) +# bare_names = alternates(mapping.keys()) +# +# yield """name_import=import_name< 'import' ((%s) | +# multiple_imports=dotted_as_names< any* (%s) any* >) > +# """ % (mod_list, mod_list) +# yield """import_from< 'from' (%s) 'import' ['('] +# ( any | import_as_name< any 'as' any > | +# import_as_names< any* >) [')'] > +# """ % mod_list +# yield """import_name< 'import' (dotted_as_name< (%s) 'as' any > | +# multiple_imports=dotted_as_names< +# any* dotted_as_name< (%s) 'as' any > any* >) > +# """ % (mod_list, mod_list) +# +# # Find usages of module members in code e.g. thread.foo(bar) +# yield "power< bare_with_attr=(%s) trailer<'.' any > any* >" % bare_names + + +# class FixUserDict(fixer_base.BaseFix): +class FixUserdict(FixImports): + + BM_compatible = True + keep_line_order = True + # This is overridden in fix_imports2. + mapping = MAPPING + + # We want to run this fixer late, so fix_import doesn't try to make stdlib + # renames into relative imports. + run_order = 6 + + def build_pattern(self): + return "|".join(build_pattern(self.mapping)) + + def compile_pattern(self): + # We override this, so MAPPING can be pragmatically altered and the + # changes will be reflected in PATTERN. + self.PATTERN = self.build_pattern() + super(FixImports, self).compile_pattern() + + # Don't match the node if it's within another match. + def match(self, node): + match = super(FixImports, self).match + results = match(node) + if results: + # Module usage could be in the trailer of an attribute lookup, so we + # might have nested matches when "bare_with_attr" is present. + if "bare_with_attr" not in results and \ + any(match(obj) for obj in attr_chain(node, "parent")): + return False + return results + return False + + def start_tree(self, tree, filename): + super(FixImports, self).start_tree(tree, filename) + self.replace = {} + + def transform(self, node, results): + import_mod = results.get("module_name") + if import_mod: + mod_name = import_mod.value + new_name = unicode(self.mapping[mod_name]) + import_mod.replace(Name(new_name, prefix=import_mod.prefix)) + if "name_import" in results: + # If it's not a "from x import x, y" or "import x as y" import, + # marked its usage to be replaced. + self.replace[mod_name] = new_name + if "multiple_imports" in results: + # This is a nasty hack to fix multiple imports on a line (e.g., + # "import StringIO, urlparse"). The problem is that I can't + # figure out an easy way to make a pattern recognize the keys of + # MAPPING randomly sprinkled in an import statement. + results = self.match(node) + if results: + self.transform(node, results) + else: + # Replace usage of the module. + bare_name = results["bare_with_attr"][0] + new_name = self.replace.get(bare_name.value) + if new_name: + bare_name.replace(Name(new_name, prefix=bare_name.prefix)) diff --git a/libfuturize/fixes2/fix_absolute_import.py b/src/libfuturize/fixes/fix_absolute_import.py similarity index 67% rename from libfuturize/fixes2/fix_absolute_import.py rename to src/libfuturize/fixes/fix_absolute_import.py index df68d66d..eab9c527 100644 --- a/libfuturize/fixes2/fix_absolute_import.py +++ b/src/libfuturize/fixes/fix_absolute_import.py @@ -1,24 +1,30 @@ """ Fixer for import statements, with a __future__ import line. -Based on lib2to3/fixes/fix_import.py +Based on lib2to3/fixes/fix_import.py, but extended slightly so it also +supports Cython modules. If spam is being imported from the local directory, this import: from spam import eggs becomes: + from __future__ import absolute_import from .spam import eggs and this import: import spam becomes: + from __future__ import absolute_import from . import spam """ +from os.path import dirname, join, exists, sep from lib2to3.fixes.fix_import import FixImport from lib2to3.fixer_util import FromImport, syms from lib2to3.fixes.fix_import import traverse_imports + from libfuturize.fixer_util import future_import + class FixAbsoluteImport(FixImport): run_order = 9 @@ -64,9 +70,22 @@ def transform(self, node, results): future_import(u"absolute_import", node) return new - # This always adds "from __future__ import absolute_import": - # def transform(self, node, results): - # result = super(FixAbsoluteImport, self).transform(node, results) - # future_import(u"absolute_import", node) - # return result - + def probably_a_local_import(self, imp_name): + """ + Like the corresponding method in the base class, but this also + supports Cython modules. + """ + if imp_name.startswith(u"."): + # Relative imports are certainly not local imports. + return False + imp_name = imp_name.split(u".", 1)[0] + base_path = dirname(self.filename) + base_path = join(base_path, imp_name) + # If there is no __init__.py next to the file its not in a package + # so can't be a relative import. + if not exists(join(dirname(base_path), "__init__.py")): + return False + for ext in [".py", sep, ".pyc", ".so", ".sl", ".pyd", ".pyx"]: + if exists(base_path + ext): + return True + return False diff --git a/libfuturize/fixes2/fix_add__future__imports_except_unicode_literals.py b/src/libfuturize/fixes/fix_add__future__imports_except_unicode_literals.py similarity index 81% rename from libfuturize/fixes2/fix_add__future__imports_except_unicode_literals.py rename to src/libfuturize/fixes/fix_add__future__imports_except_unicode_literals.py index 643f9c11..37d7feec 100644 --- a/libfuturize/fixes2/fix_add__future__imports_except_unicode_literals.py +++ b/src/libfuturize/fixes/fix_add__future__imports_except_unicode_literals.py @@ -1,7 +1,9 @@ """ Fixer for adding: - from __future__ import (absolute_import, division, print_function) + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function This is "stage 1": hopefully uncontroversial changes. @@ -19,7 +21,6 @@ class FixAddFutureImportsExceptUnicodeLiterals(fixer_base.BaseFix): def transform(self, node, results): # Reverse order: - future_import(u"print_function", node) - future_import(u"division", node) future_import(u"absolute_import", node) - + future_import(u"division", node) + future_import(u"print_function", node) diff --git a/src/libfuturize/fixes/fix_basestring.py b/src/libfuturize/fixes/fix_basestring.py new file mode 100644 index 00000000..5676d08f --- /dev/null +++ b/src/libfuturize/fixes/fix_basestring.py @@ -0,0 +1,17 @@ +""" +Fixer that adds ``from past.builtins import basestring`` if there is a +reference to ``basestring`` +""" + +from lib2to3 import fixer_base + +from libfuturize.fixer_util import touch_import_top + + +class FixBasestring(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = "'basestring'" + + def transform(self, node, results): + touch_import_top(u'past.builtins', 'basestring', node) diff --git a/libfuturize/fixes2/fix_bytes.py b/src/libfuturize/fixes/fix_bytes.py similarity index 100% rename from libfuturize/fixes2/fix_bytes.py rename to src/libfuturize/fixes/fix_bytes.py diff --git a/src/libfuturize/fixes/fix_cmp.py b/src/libfuturize/fixes/fix_cmp.py new file mode 100644 index 00000000..762eb4b4 --- /dev/null +++ b/src/libfuturize/fixes/fix_cmp.py @@ -0,0 +1,33 @@ +# coding: utf-8 +""" +Fixer for the cmp() function on Py2, which was removed in Py3. + +Adds this import line:: + + from past.builtins import cmp + +if cmp() is called in the code. +""" + +from __future__ import unicode_literals +from lib2to3 import fixer_base + +from libfuturize.fixer_util import touch_import_top + + +expression = "name='cmp'" + + +class FixCmp(fixer_base.BaseFix): + BM_compatible = True + run_order = 9 + + PATTERN = """ + power< + ({0}) trailer< '(' args=[any] ')' > + rest=any* > + """.format(expression) + + def transform(self, node, results): + name = results["name"] + touch_import_top(u'past.builtins', name.value, node) diff --git a/libfuturize/fixes2/fix_division.py b/src/libfuturize/fixes/fix_division.py similarity index 70% rename from libfuturize/fixes2/fix_division.py rename to src/libfuturize/fixes/fix_division.py index 54df2a2b..6975a52b 100644 --- a/libfuturize/fixes2/fix_division.py +++ b/src/libfuturize/fixes/fix_division.py @@ -1,4 +1,5 @@ """ +UNFINISHED For the ``future`` package. Adds this import line: @@ -8,5 +9,4 @@ at the top so the code runs identically on Py3 and Py2.6/2.7 """ -from ..fixes3.fix_division import FixDivision - +from libpasteurize.fixes.fix_division import FixDivision diff --git a/src/libfuturize/fixes/fix_division_safe.py b/src/libfuturize/fixes/fix_division_safe.py new file mode 100644 index 00000000..65c8c1da --- /dev/null +++ b/src/libfuturize/fixes/fix_division_safe.py @@ -0,0 +1,109 @@ +""" +For the ``future`` package. + +Adds this import line: + + from __future__ import division + +at the top and changes any old-style divisions to be calls to +past.utils.old_div so the code runs as before on Py2.6/2.7 and has the same +behaviour on Py3. + +If "from __future__ import division" is already in effect, this fixer does +nothing. +""" + +import re +from lib2to3.fixer_util import Leaf, Node, Comma +from lib2to3 import fixer_base +from libfuturize.fixer_util import (token, future_import, touch_import_top, + wrap_in_fn_call) + + +def match_division(node): + u""" + __future__.division redefines the meaning of a single slash for division, + so we match that and only that. + """ + slash = token.SLASH + return node.type == slash and not node.next_sibling.type == slash and \ + not node.prev_sibling.type == slash + +const_re = re.compile('^[0-9]*[.][0-9]*$') + +def is_floaty(node): + return _is_floaty(node.prev_sibling) or _is_floaty(node.next_sibling) + + +def _is_floaty(expr): + if isinstance(expr, list): + expr = expr[0] + + if isinstance(expr, Leaf): + # If it's a leaf, let's see if it's a numeric constant containing a '.' + return const_re.match(expr.value) + elif isinstance(expr, Node): + # If the expression is a node, let's see if it's a direct cast to float + if isinstance(expr.children[0], Leaf): + return expr.children[0].value == u'float' + return False + + +class FixDivisionSafe(fixer_base.BaseFix): + # BM_compatible = True + run_order = 4 # this seems to be ignored? + + _accept_type = token.SLASH + + PATTERN = """ + term<(not('/') any)+ '/' ((not('/') any))> + """ + + def start_tree(self, tree, name): + """ + Skip this fixer if "__future__.division" is already imported. + """ + super(FixDivisionSafe, self).start_tree(tree, name) + self.skip = "division" in tree.future_features + + def match(self, node): + u""" + Since the tree needs to be fixed once and only once if and only if it + matches, we can start discarding matches after the first. + """ + if node.type == self.syms.term: + matched = False + skip = False + children = [] + for child in node.children: + if skip: + skip = False + continue + if match_division(child) and not is_floaty(child): + matched = True + + # Strip any leading space for the first number: + children[0].prefix = u'' + + children = [wrap_in_fn_call("old_div", + children + [Comma(), child.next_sibling.clone()], + prefix=node.prefix)] + skip = True + else: + children.append(child.clone()) + if matched: + # In Python 2.6, `Node` does not have the fixers_applied attribute + # https://github.com/python/cpython/blob/8493c0cd66cfc181ac1517268a74f077e9998701/Lib/lib2to3/pytree.py#L235 + if hasattr(Node, "fixers_applied"): + return Node(node.type, children, fixers_applied=node.fixers_applied) + else: + return Node(node.type, children) + + return False + + def transform(self, node, results): + if self.skip: + return + future_import(u"division", node) + touch_import_top(u'past.utils', u'old_div', node) + return results diff --git a/src/libfuturize/fixes/fix_execfile.py b/src/libfuturize/fixes/fix_execfile.py new file mode 100644 index 00000000..cfe9d8d0 --- /dev/null +++ b/src/libfuturize/fixes/fix_execfile.py @@ -0,0 +1,37 @@ +# coding: utf-8 +""" +Fixer for the execfile() function on Py2, which was removed in Py3. + +The Lib/lib2to3/fixes/fix_execfile.py module has some problems: see +python-future issue #37. This fixer merely imports execfile() from +past.builtins and leaves the code alone. + +Adds this import line:: + + from past.builtins import execfile + +for the function execfile() that was removed from Py3. +""" + +from __future__ import unicode_literals +from lib2to3 import fixer_base + +from libfuturize.fixer_util import touch_import_top + + +expression = "name='execfile'" + + +class FixExecfile(fixer_base.BaseFix): + BM_compatible = True + run_order = 9 + + PATTERN = """ + power< + ({0}) trailer< '(' args=[any] ')' > + rest=any* > + """.format(expression) + + def transform(self, node, results): + name = results["name"] + touch_import_top(u'past.builtins', name.value, node) diff --git a/src/libfuturize/fixes/fix_future_builtins.py b/src/libfuturize/fixes/fix_future_builtins.py new file mode 100644 index 00000000..eea6c6a1 --- /dev/null +++ b/src/libfuturize/fixes/fix_future_builtins.py @@ -0,0 +1,59 @@ +""" +For the ``future`` package. + +Adds this import line:: + + from builtins import XYZ + +for each of the functions XYZ that is used in the module. + +Adds these imports after any other imports (in an initial block of them). +""" + +from __future__ import unicode_literals + +from lib2to3 import fixer_base +from lib2to3.pygram import python_symbols as syms +from lib2to3.fixer_util import Name, Call, in_special_context + +from libfuturize.fixer_util import touch_import_top + +# All builtins are: +# from future.builtins.iterators import (filter, map, zip) +# from future.builtins.misc import (ascii, chr, hex, input, isinstance, oct, open, round, super) +# from future.types import (bytes, dict, int, range, str) +# We don't need isinstance any more. + +replaced_builtin_fns = '''filter map zip + ascii chr hex input next oct + bytes range str raw_input'''.split() + # This includes raw_input as a workaround for the + # lib2to3 fixer for raw_input on Py3 (only), allowing + # the correct import to be included. (Py3 seems to run + # the fixers the wrong way around, perhaps ignoring the + # run_order class attribute below ...) + +expression = '|'.join(["name='{0}'".format(name) for name in replaced_builtin_fns]) + + +class FixFutureBuiltins(fixer_base.BaseFix): + BM_compatible = True + run_order = 7 + + # Currently we only match uses as a function. This doesn't match e.g.: + # if isinstance(s, str): + # ... + PATTERN = """ + power< + ({0}) trailer< '(' [arglist=any] ')' > + rest=any* > + | + power< + 'map' trailer< '(' [arglist=any] ')' > + > + """.format(expression) + + def transform(self, node, results): + name = results["name"] + touch_import_top(u'builtins', name.value, node) + # name.replace(Name(u"input", prefix=name.prefix)) diff --git a/libfuturize/fixes2/fix_future_standard_library.py b/src/libfuturize/fixes/fix_future_standard_library.py similarity index 90% rename from libfuturize/fixes2/fix_future_standard_library.py rename to src/libfuturize/fixes/fix_future_standard_library.py index 828e5622..a1c3f3d4 100644 --- a/libfuturize/fixes2/fix_future_standard_library.py +++ b/src/libfuturize/fixes/fix_future_standard_library.py @@ -2,9 +2,10 @@ For the ``future`` package. Changes any imports needed to reflect the standard library reorganization. Also -Also adds this import line: +Also adds these import lines: from future import standard_library + standard_library.install_aliases() after any __future__ imports but before any other imports. """ @@ -21,4 +22,3 @@ def transform(self, node, results): # TODO: add a blank line between any __future__ imports and this? touch_import_top(u'future', u'standard_library', node) return result - diff --git a/src/libfuturize/fixes/fix_future_standard_library_urllib.py b/src/libfuturize/fixes/fix_future_standard_library_urllib.py new file mode 100644 index 00000000..cf673884 --- /dev/null +++ b/src/libfuturize/fixes/fix_future_standard_library_urllib.py @@ -0,0 +1,28 @@ +""" +For the ``future`` package. + +A special fixer that ensures that these lines have been added:: + + from future import standard_library + standard_library.install_hooks() + +even if the only module imported was ``urllib``, in which case the regular fixer +wouldn't have added these lines. + +""" + +from lib2to3.fixes.fix_urllib import FixUrllib +from libfuturize.fixer_util import touch_import_top, find_root + + +class FixFutureStandardLibraryUrllib(FixUrllib): # not a subclass of FixImports + run_order = 8 + + def transform(self, node, results): + # transform_member() in lib2to3/fixes/fix_urllib.py breaks node so find_root(node) + # no longer works after the super() call below. So we find the root first: + root = find_root(node) + result = super(FixFutureStandardLibraryUrllib, self).transform(node, results) + # TODO: add a blank line between any __future__ imports and this? + touch_import_top(u'future', u'standard_library', root) + return result diff --git a/src/libfuturize/fixes/fix_input.py b/src/libfuturize/fixes/fix_input.py new file mode 100644 index 00000000..8a43882e --- /dev/null +++ b/src/libfuturize/fixes/fix_input.py @@ -0,0 +1,32 @@ +""" +Fixer for input. + +Does a check for `from builtins import input` before running the lib2to3 fixer. +The fixer will not run when the input is already present. + + +this: + a = input() +becomes: + from builtins import input + a = eval(input()) + +and this: + from builtins import input + a = input() +becomes (no change): + from builtins import input + a = input() +""" + +import lib2to3.fixes.fix_input +from lib2to3.fixer_util import does_tree_import + + +class FixInput(lib2to3.fixes.fix_input.FixInput): + def transform(self, node, results): + + if does_tree_import('builtins', 'input', node): + return + + return super(FixInput, self).transform(node, results) diff --git a/libfuturize/fixes2/fix_metaclass.py b/src/libfuturize/fixes/fix_metaclass.py similarity index 96% rename from libfuturize/fixes2/fix_metaclass.py rename to src/libfuturize/fixes/fix_metaclass.py index a917341d..a7eee40d 100644 --- a/libfuturize/fixes2/fix_metaclass.py +++ b/src/libfuturize/fixes/fix_metaclass.py @@ -37,7 +37,7 @@ def has_metaclass(parent): """ we have to check the cls_node without changing it. - There are two possiblities: + There are two possibilities: 1) clsdef => suite => simple_stmt => expr_stmt => Leaf('__meta') 2) clsdef => simple_stmt => expr_stmt => Leaf('__meta') """ @@ -63,7 +63,7 @@ def fixup_parse_tree(cls_node): # already in the preferred format, do nothing return - # !%@#! oneliners have no suite node, we have to fake one up + # !%@#! one-liners have no suite node, we have to fake one up for i, node in enumerate(cls_node.children): if node.type == token.COLON: break @@ -201,6 +201,11 @@ def transform(self, node, results): else: raise ValueError("Unexpected class definition") + # now stick the metaclass in the arglist + meta_txt = last_metaclass.children[0].children[0] + meta_txt.value = 'metaclass' + orig_meta_prefix = meta_txt.prefix + # Was: touch_import(None, u'future.utils', node) touch_import(u'future.utils', u'with_metaclass', node) diff --git a/src/libfuturize/fixes/fix_next_call.py b/src/libfuturize/fixes/fix_next_call.py new file mode 100644 index 00000000..282f1852 --- /dev/null +++ b/src/libfuturize/fixes/fix_next_call.py @@ -0,0 +1,104 @@ +""" +Based on fix_next.py by Collin Winter. + +Replaces it.next() -> next(it), per PEP 3114. + +Unlike fix_next.py, this fixer doesn't replace the name of a next method with __next__, +which would break Python 2 compatibility without further help from fixers in +stage 2. +""" + +# Local imports +from lib2to3.pgen2 import token +from lib2to3.pygram import python_symbols as syms +from lib2to3 import fixer_base +from lib2to3.fixer_util import Name, Call, find_binding + +bind_warning = "Calls to builtin next() possibly shadowed by global binding" + + +class FixNextCall(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< base=any+ trailer< '.' attr='next' > trailer< '(' ')' > > + | + power< head=any+ trailer< '.' attr='next' > not trailer< '(' ')' > > + | + global=global_stmt< 'global' any* 'next' any* > + """ + + order = "pre" # Pre-order tree traversal + + def start_tree(self, tree, filename): + super(FixNextCall, self).start_tree(tree, filename) + + n = find_binding('next', tree) + if n: + self.warning(n, bind_warning) + self.shadowed_next = True + else: + self.shadowed_next = False + + def transform(self, node, results): + assert results + + base = results.get("base") + attr = results.get("attr") + name = results.get("name") + + if base: + if self.shadowed_next: + # Omit this: + # attr.replace(Name("__next__", prefix=attr.prefix)) + pass + else: + base = [n.clone() for n in base] + base[0].prefix = "" + node.replace(Call(Name("next", prefix=node.prefix), base)) + elif name: + # Omit this: + # n = Name("__next__", prefix=name.prefix) + # name.replace(n) + pass + elif attr: + # We don't do this transformation if we're assigning to "x.next". + # Unfortunately, it doesn't seem possible to do this in PATTERN, + # so it's being done here. + if is_assign_target(node): + head = results["head"] + if "".join([str(n) for n in head]).strip() == '__builtin__': + self.warning(node, bind_warning) + return + # Omit this: + # attr.replace(Name("__next__")) + elif "global" in results: + self.warning(node, bind_warning) + self.shadowed_next = True + + +### The following functions help test if node is part of an assignment +### target. + +def is_assign_target(node): + assign = find_assign(node) + if assign is None: + return False + + for child in assign.children: + if child.type == token.EQUAL: + return False + elif is_subtree(child, node): + return True + return False + +def find_assign(node): + if node.type == syms.expr_stmt: + return node + if node.type == syms.simple_stmt or node.parent is None: + return None + return find_assign(node.parent) + +def is_subtree(root, node): + if root == node: + return True + return any(is_subtree(c, node) for c in root.children) diff --git a/src/libfuturize/fixes/fix_object.py b/src/libfuturize/fixes/fix_object.py new file mode 100644 index 00000000..accf2c52 --- /dev/null +++ b/src/libfuturize/fixes/fix_object.py @@ -0,0 +1,17 @@ +""" +Fixer that adds ``from builtins import object`` if there is a line +like this: + class Foo(object): +""" + +from lib2to3 import fixer_base + +from libfuturize.fixer_util import touch_import_top + + +class FixObject(fixer_base.BaseFix): + + PATTERN = u"classdef< 'class' NAME '(' name='object' ')' colon=':' any >" + + def transform(self, node, results): + touch_import_top(u'builtins', 'object', node) diff --git a/src/libfuturize/fixes/fix_oldstr_wrap.py b/src/libfuturize/fixes/fix_oldstr_wrap.py new file mode 100644 index 00000000..ad58771d --- /dev/null +++ b/src/libfuturize/fixes/fix_oldstr_wrap.py @@ -0,0 +1,39 @@ +""" +For the ``future`` package. + +Adds this import line: + + from past.builtins import str as oldstr + +at the top and wraps any unadorned string literals 'abc' or explicit byte-string +literals b'abc' in oldstr() calls so the code has the same behaviour on Py3 as +on Py2.6/2.7. +""" + +from __future__ import unicode_literals +import re +from lib2to3 import fixer_base +from lib2to3.pgen2 import token +from lib2to3.fixer_util import syms +from libfuturize.fixer_util import (future_import, touch_import_top, + wrap_in_fn_call) + + +_literal_re = re.compile(r"[^uUrR]?[\'\"]") + + +class FixOldstrWrap(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "STRING" + + def transform(self, node, results): + if node.type == token.STRING: + touch_import_top(u'past.types', u'oldstr', node) + if _literal_re.match(node.value): + new = node.clone() + # Strip any leading space or comments: + # TODO: check: do we really want to do this? + new.prefix = u'' + new.value = u'b' + new.value + wrapped = wrap_in_fn_call("oldstr", [new], prefix=node.prefix) + return wrapped diff --git a/libfuturize/fixes2/fix_order___future__imports.py b/src/libfuturize/fixes/fix_order___future__imports.py similarity index 93% rename from libfuturize/fixes2/fix_order___future__imports.py rename to src/libfuturize/fixes/fix_order___future__imports.py index 8c32c28c..00d7ef60 100644 --- a/libfuturize/fixes2/fix_order___future__imports.py +++ b/src/libfuturize/fixes/fix_order___future__imports.py @@ -1,4 +1,6 @@ """ +UNFINISHED + Fixer for turning multiple lines like these: from __future__ import division @@ -26,12 +28,9 @@ class FixOrderFutureImports(fixer_base.BaseFix): # Match only once per file # """ # if hasattr(node, 'type') and node.type == syms.file_input: - # import pdb - # pdb.set_trace() # return True # return False def transform(self, node, results): # TODO # write me pass - diff --git a/src/libfuturize/fixes/fix_print.py b/src/libfuturize/fixes/fix_print.py new file mode 100644 index 00000000..2554717c --- /dev/null +++ b/src/libfuturize/fixes/fix_print.py @@ -0,0 +1,104 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for print. + +Change: + "print" into "print()" + "print ..." into "print(...)" + "print(...)" not changed + "print ... ," into "print(..., end=' ')" + "print >>x, ..." into "print(..., file=x)" + +No changes are applied if print_function is imported from __future__ + +""" + +# Local imports +from lib2to3 import patcomp, pytree, fixer_base +from lib2to3.pgen2 import token +from lib2to3.fixer_util import Name, Call, Comma, String +# from libmodernize import add_future + +parend_expr = patcomp.compile_pattern( + """atom< '(' [arith_expr|atom|power|term|STRING|NAME] ')' >""" + ) + + +class FixPrint(fixer_base.BaseFix): + + BM_compatible = True + + PATTERN = """ + simple_stmt< any* bare='print' any* > | print_stmt + """ + + def transform(self, node, results): + assert results + + bare_print = results.get("bare") + + if bare_print: + # Special-case print all by itself. + bare_print.replace(Call(Name(u"print"), [], + prefix=bare_print.prefix)) + # The "from __future__ import print_function"" declaration is added + # by the fix_print_with_import fixer, so we skip it here. + # add_future(node, u'print_function') + return + assert node.children[0] == Name(u"print") + args = node.children[1:] + if len(args) == 1 and parend_expr.match(args[0]): + # We don't want to keep sticking parens around an + # already-parenthesised expression. + return + + sep = end = file = None + if args and args[-1] == Comma(): + args = args[:-1] + end = " " + + # try to determine if the string ends in a non-space whitespace character, in which + # case there should be no space at the end of the conversion + string_leaves = [leaf for leaf in args[-1].leaves() if leaf.type == token.STRING] + if ( + string_leaves + and string_leaves[-1].value[0] != "r" # "raw" string + and string_leaves[-1].value[-3:-1] in (r"\t", r"\n", r"\r") + ): + end = "" + if args and args[0] == pytree.Leaf(token.RIGHTSHIFT, u">>"): + assert len(args) >= 2 + file = args[1].clone() + args = args[3:] # Strip a possible comma after the file expression + # Now synthesize a print(args, sep=..., end=..., file=...) node. + l_args = [arg.clone() for arg in args] + if l_args: + l_args[0].prefix = u"" + if sep is not None or end is not None or file is not None: + if sep is not None: + self.add_kwarg(l_args, u"sep", String(repr(sep))) + if end is not None: + self.add_kwarg(l_args, u"end", String(repr(end))) + if file is not None: + self.add_kwarg(l_args, u"file", file) + n_stmt = Call(Name(u"print"), l_args) + n_stmt.prefix = node.prefix + + # Note that there are corner cases where adding this future-import is + # incorrect, for example when the file also has a 'print ()' statement + # that was intended to print "()". + # add_future(node, u'print_function') + return n_stmt + + def add_kwarg(self, l_nodes, s_kwd, n_expr): + # XXX All this prefix-setting may lose comments (though rarely) + n_expr.prefix = u"" + n_argument = pytree.Node(self.syms.argument, + (Name(s_kwd), + pytree.Leaf(token.EQUAL, u"="), + n_expr)) + if l_nodes: + l_nodes.append(Comma()) + n_argument.prefix = u" " + l_nodes.append(n_argument) diff --git a/libfuturize/fixes2/fix_print_with_import.py b/src/libfuturize/fixes/fix_print_with_import.py similarity index 66% rename from libfuturize/fixes2/fix_print_with_import.py rename to src/libfuturize/fixes/fix_print_with_import.py index 7f526b4b..34490461 100644 --- a/libfuturize/fixes2/fix_print_with_import.py +++ b/src/libfuturize/fixes/fix_print_with_import.py @@ -8,13 +8,15 @@ at the top to retain compatibility with Python 2.6+. """ -from lib2to3.fixes.fix_print import FixPrint +from libfuturize.fixes.fix_print import FixPrint from libfuturize.fixer_util import future_import class FixPrintWithImport(FixPrint): run_order = 7 def transform(self, node, results): - n_stmt = super(FixPrintWithImport, self).transform(node, results) + # Add the __future__ import first. (Otherwise any shebang or encoding + # comment line attached as a prefix to the print statement will be + # copied twice and appear twice.) future_import(u'print_function', node) + n_stmt = super(FixPrintWithImport, self).transform(node, results) return n_stmt - diff --git a/src/libfuturize/fixes/fix_raise.py b/src/libfuturize/fixes/fix_raise.py new file mode 100644 index 00000000..d113401c --- /dev/null +++ b/src/libfuturize/fixes/fix_raise.py @@ -0,0 +1,107 @@ +"""Fixer for 'raise E, V' + +From Armin Ronacher's ``python-modernize``. + +raise -> raise +raise E -> raise E +raise E, 5 -> raise E(5) +raise E, 5, T -> raise E(5).with_traceback(T) +raise E, None, T -> raise E.with_traceback(T) + +raise (((E, E'), E''), E'''), 5 -> raise E(5) +raise "foo", V, T -> warns about string exceptions + +raise E, (V1, V2) -> raise E(V1, V2) +raise E, (V1, V2), T -> raise E(V1, V2).with_traceback(T) + + +CAVEATS: +1) "raise E, V, T" cannot be translated safely in general. If V + is not a tuple or a (number, string, None) literal, then: + + raise E, V, T -> from future.utils import raise_ + raise_(E, V, T) +""" +# Author: Collin Winter, Armin Ronacher, Mark Huang + +# Local imports +from lib2to3 import pytree, fixer_base +from lib2to3.pgen2 import token +from lib2to3.fixer_util import Name, Call, is_tuple, Comma, Attr, ArgList + +from libfuturize.fixer_util import touch_import_top + + +class FixRaise(fixer_base.BaseFix): + + BM_compatible = True + PATTERN = """ + raise_stmt< 'raise' exc=any [',' val=any [',' tb=any]] > + """ + + def transform(self, node, results): + syms = self.syms + + exc = results["exc"].clone() + if exc.type == token.STRING: + msg = "Python 3 does not support string exceptions" + self.cannot_convert(node, msg) + return + + # Python 2 supports + # raise ((((E1, E2), E3), E4), E5), V + # as a synonym for + # raise E1, V + # Since Python 3 will not support this, we recurse down any tuple + # literals, always taking the first element. + if is_tuple(exc): + while is_tuple(exc): + # exc.children[1:-1] is the unparenthesized tuple + # exc.children[1].children[0] is the first element of the tuple + exc = exc.children[1].children[0].clone() + exc.prefix = u" " + + if "tb" in results: + tb = results["tb"].clone() + else: + tb = None + + if "val" in results: + val = results["val"].clone() + if is_tuple(val): + # Assume that exc is a subclass of Exception and call exc(*val). + args = [c.clone() for c in val.children[1:-1]] + exc = Call(exc, args) + elif val.type in (token.NUMBER, token.STRING): + # Handle numeric and string literals specially, e.g. + # "raise Exception, 5" -> "raise Exception(5)". + val.prefix = u"" + exc = Call(exc, [val]) + elif val.type == token.NAME and val.value == u"None": + # Handle None specially, e.g. + # "raise Exception, None" -> "raise Exception". + pass + else: + # val is some other expression. If val evaluates to an instance + # of exc, it should just be raised. If val evaluates to None, + # a default instance of exc should be raised (as above). If val + # evaluates to a tuple, exc(*val) should be called (as + # above). Otherwise, exc(val) should be called. We can only + # tell what to do at runtime, so defer to future.utils.raise_(), + # which handles all of these cases. + touch_import_top(u"future.utils", u"raise_", node) + exc.prefix = u"" + args = [exc, Comma(), val] + if tb is not None: + args += [Comma(), tb] + return Call(Name(u"raise_"), args, prefix=node.prefix) + + if tb is not None: + tb.prefix = "" + exc_list = Attr(exc, Name('with_traceback')) + [ArgList([tb])] + else: + exc_list = [exc] + + return pytree.Node(syms.raise_stmt, + [Name(u"raise")] + exc_list, + prefix=node.prefix) diff --git a/src/libfuturize/fixes/fix_remove_old__future__imports.py b/src/libfuturize/fixes/fix_remove_old__future__imports.py new file mode 100644 index 00000000..9336f75f --- /dev/null +++ b/src/libfuturize/fixes/fix_remove_old__future__imports.py @@ -0,0 +1,26 @@ +""" +Fixer for removing any of these lines: + + from __future__ import with_statement + from __future__ import nested_scopes + from __future__ import generators + +The reason is that __future__ imports like these are required to be the first +line of code (after docstrings) on Python 2.6+, which can get in the way. + +These imports are always enabled in Python 2.6+, which is the minimum sane +version to target for Py2/3 compatibility. +""" + +from lib2to3 import fixer_base +from libfuturize.fixer_util import remove_future_import + +class FixRemoveOldFutureImports(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "file_input" + run_order = 1 + + def transform(self, node, results): + remove_future_import(u"with_statement", node) + remove_future_import(u"nested_scopes", node) + remove_future_import(u"generators", node) diff --git a/libfuturize/fixes2/fix_unicode_keep_u.py b/src/libfuturize/fixes/fix_unicode_keep_u.py similarity index 89% rename from libfuturize/fixes2/fix_unicode_keep_u.py rename to src/libfuturize/fixes/fix_unicode_keep_u.py index 81d4a078..2e9a4e47 100644 --- a/libfuturize/fixes2/fix_unicode_keep_u.py +++ b/src/libfuturize/fixes/fix_unicode_keep_u.py @@ -1,14 +1,13 @@ """Fixer that changes unicode to str and unichr to chr, but -- unlike the lib2to3 fix_unicode.py fixer, does not change u"..." into "...". -The reason is that Py3.3+ support the u"..." string prefix, and, if +The reason is that Py3.3+ supports the u"..." string prefix, and, if present, the prefix may provide useful information for disambiguating between byte strings and unicode strings, which is often the hardest part of the porting task. """ -import re from lib2to3.pgen2 import token from lib2to3 import fixer_base @@ -23,4 +22,3 @@ def transform(self, node, results): new = node.clone() new.value = _mapping[node.value] return new - diff --git a/libfuturize/fixes2/fix_unicode_literals_import.py b/src/libfuturize/fixes/fix_unicode_literals_import.py similarity index 98% rename from libfuturize/fixes2/fix_unicode_literals_import.py rename to src/libfuturize/fixes/fix_unicode_literals_import.py index 9f21d7c6..51c50620 100644 --- a/libfuturize/fixes2/fix_unicode_literals_import.py +++ b/src/libfuturize/fixes/fix_unicode_literals_import.py @@ -1,6 +1,6 @@ """ Adds this import: - + from __future__ import unicode_literals """ @@ -16,4 +16,3 @@ class FixUnicodeLiteralsImport(fixer_base.BaseFix): def transform(self, node, results): future_import(u"unicode_literals", node) - diff --git a/src/libfuturize/fixes/fix_xrange_with_import.py b/src/libfuturize/fixes/fix_xrange_with_import.py new file mode 100644 index 00000000..c910f816 --- /dev/null +++ b/src/libfuturize/fixes/fix_xrange_with_import.py @@ -0,0 +1,20 @@ +""" +For the ``future`` package. + +Turns any xrange calls into range calls and adds this import line: + + from builtins import range + +at the top. +""" + +from lib2to3.fixes.fix_xrange import FixXrange + +from libfuturize.fixer_util import touch_import_top + + +class FixXrangeWithImport(FixXrange): + def transform(self, node, results): + result = super(FixXrangeWithImport, self).transform(node, results) + touch_import_top('builtins', 'range', node) + return result diff --git a/src/libfuturize/main.py b/src/libfuturize/main.py new file mode 100644 index 00000000..634c2f25 --- /dev/null +++ b/src/libfuturize/main.py @@ -0,0 +1,322 @@ +""" +futurize: automatic conversion to clean 2/3 code using ``python-future`` +====================================================================== + +Like Armin Ronacher's modernize.py, ``futurize`` attempts to produce clean +standard Python 3 code that runs on both Py2 and Py3. + +One pass +-------- + +Use it like this on Python 2 code: + + $ futurize --verbose mypython2script.py + +This will attempt to port the code to standard Py3 code that also +provides Py2 compatibility with the help of the right imports from +``future``. + +To write changes to the files, use the -w flag. + +Two stages +---------- + +The ``futurize`` script can also be called in two separate stages. First: + + $ futurize --stage1 mypython2script.py + +This produces more modern Python 2 code that is not yet compatible with Python +3. The tests should still run and the diff should be uncontroversial to apply to +most Python projects that are willing to drop support for Python 2.5 and lower. + +After this, the recommended approach is to explicitly mark all strings that must +be byte-strings with a b'' prefix and all text (unicode) strings with a u'' +prefix, and then invoke the second stage of Python 2 to 2/3 conversion with:: + + $ futurize --stage2 mypython2script.py + +Stage 2 adds a dependency on ``future``. It converts most remaining Python +2-specific code to Python 3 code and adds appropriate imports from ``future`` +to restore Py2 support. + +The command above leaves all unadorned string literals as native strings +(byte-strings on Py2, unicode strings on Py3). If instead you would like all +unadorned string literals to be promoted to unicode, you can also pass this +flag: + + $ futurize --stage2 --unicode-literals mypython2script.py + +This adds the declaration ``from __future__ import unicode_literals`` to the +top of each file, which implicitly declares all unadorned string literals to be +unicode strings (``unicode`` on Py2). + +All imports +----------- + +The --all-imports option forces adding all ``__future__`` imports, +``builtins`` imports, and standard library aliases, even if they don't +seem necessary for the current state of each module. (This can simplify +testing, and can reduce the need to think about Py2 compatibility when editing +the code further.) + +""" + +from __future__ import (absolute_import, print_function, unicode_literals) +import future.utils +from future import __version__ + +import sys +import logging +import optparse +import os + +from lib2to3.main import warn, StdoutRefactoringTool +from lib2to3 import refactor + +from libfuturize.fixes import (lib2to3_fix_names_stage1, + lib2to3_fix_names_stage2, + libfuturize_fix_names_stage1, + libfuturize_fix_names_stage2) + +fixer_pkg = 'libfuturize.fixes' + + +def main(args=None): + """Main program. + + Args: + fixer_pkg: the name of a package where the fixers are located. + args: optional; a list of command line arguments. If omitted, + sys.argv[1:] is used. + + Returns a suggested exit status (0, 1, 2). + """ + + # Set up option parser + parser = optparse.OptionParser(usage="futurize [options] file|dir ...") + parser.add_option("-V", "--version", action="store_true", + help="Report the version number of futurize") + parser.add_option("-a", "--all-imports", action="store_true", + help="Add all __future__ and future imports to each module") + parser.add_option("-1", "--stage1", action="store_true", + help="Modernize Python 2 code only; no compatibility with Python 3 (or dependency on ``future``)") + parser.add_option("-2", "--stage2", action="store_true", + help="Take modernized (stage1) code and add a dependency on ``future`` to provide Py3 compatibility.") + parser.add_option("-0", "--both-stages", action="store_true", + help="Apply both stages 1 and 2") + parser.add_option("-u", "--unicode-literals", action="store_true", + help="Add ``from __future__ import unicode_literals`` to implicitly convert all unadorned string literals '' into unicode strings") + parser.add_option("-f", "--fix", action="append", default=[], + help="Each FIX specifies a transformation; default: all.\nEither use '-f division -f metaclass' etc. or use the fully-qualified module name: '-f lib2to3.fixes.fix_types -f libfuturize.fixes.fix_unicode_keep_u'") + parser.add_option("-j", "--processes", action="store", default=1, + type="int", help="Run 2to3 concurrently") + parser.add_option("-x", "--nofix", action="append", default=[], + help="Prevent a fixer from being run.") + parser.add_option("-l", "--list-fixes", action="store_true", + help="List available transformations") + parser.add_option("-p", "--print-function", action="store_true", + help="Modify the grammar so that print() is a function") + parser.add_option("-v", "--verbose", action="store_true", + help="More verbose logging") + parser.add_option("--no-diffs", action="store_true", + help="Don't show diffs of the refactoring") + parser.add_option("-w", "--write", action="store_true", + help="Write back modified files") + parser.add_option("-n", "--nobackups", action="store_true", default=False, + help="Don't write backups for modified files.") + parser.add_option("-o", "--output-dir", action="store", type="str", + default="", help="Put output files in this directory " + "instead of overwriting the input files. Requires -n. " + "For Python >= 2.7 only.") + parser.add_option("-W", "--write-unchanged-files", action="store_true", + help="Also write files even if no changes were required" + " (useful with --output-dir); implies -w.") + parser.add_option("--add-suffix", action="store", type="str", default="", + help="Append this string to all output filenames." + " Requires -n if non-empty. For Python >= 2.7 only." + "ex: --add-suffix='3' will generate .py3 files.") + + # Parse command line arguments + flags = {} + refactor_stdin = False + options, args = parser.parse_args(args) + + if options.write_unchanged_files: + flags["write_unchanged_files"] = True + if not options.write: + warn("--write-unchanged-files/-W implies -w.") + options.write = True + # If we allowed these, the original files would be renamed to backup names + # but not replaced. + if options.output_dir and not options.nobackups: + parser.error("Can't use --output-dir/-o without -n.") + if options.add_suffix and not options.nobackups: + parser.error("Can't use --add-suffix without -n.") + + if not options.write and options.no_diffs: + warn("not writing files and not printing diffs; that's not very useful") + if not options.write and options.nobackups: + parser.error("Can't use -n without -w") + if "-" in args: + refactor_stdin = True + if options.write: + print("Can't write to stdin.", file=sys.stderr) + return 2 + # Is this ever necessary? + if options.print_function: + flags["print_function"] = True + + # Set up logging handler + level = logging.DEBUG if options.verbose else logging.INFO + logging.basicConfig(format='%(name)s: %(message)s', level=level) + logger = logging.getLogger('libfuturize.main') + + if options.stage1 or options.stage2: + assert options.both_stages is None + options.both_stages = False + else: + options.both_stages = True + + avail_fixes = set() + + if options.stage1 or options.both_stages: + avail_fixes.update(lib2to3_fix_names_stage1) + avail_fixes.update(libfuturize_fix_names_stage1) + if options.stage2 or options.both_stages: + avail_fixes.update(lib2to3_fix_names_stage2) + avail_fixes.update(libfuturize_fix_names_stage2) + + if options.unicode_literals: + avail_fixes.add('libfuturize.fixes.fix_unicode_literals_import') + + if options.version: + print(__version__) + return 0 + if options.list_fixes: + print("Available transformations for the -f/--fix option:") + # for fixname in sorted(refactor.get_all_fix_names(fixer_pkg)): + for fixname in sorted(avail_fixes): + print(fixname) + if not args: + return 0 + if not args: + print("At least one file or directory argument required.", + file=sys.stderr) + print("Use --help to show usage.", file=sys.stderr) + return 2 + + unwanted_fixes = set() + for fix in options.nofix: + if ".fix_" in fix: + unwanted_fixes.add(fix) + else: + # Infer the full module name for the fixer. + # First ensure that no names clash (e.g. + # lib2to3.fixes.fix_blah and libfuturize.fixes.fix_blah): + found = [f for f in avail_fixes + if f.endswith('fix_{0}'.format(fix))] + if len(found) > 1: + print("Ambiguous fixer name. Choose a fully qualified " + "module name instead from these:\n" + + "\n".join(" " + myf for myf in found), + file=sys.stderr) + return 2 + elif len(found) == 0: + print("Unknown fixer. Use --list-fixes or -l for a list.", + file=sys.stderr) + return 2 + unwanted_fixes.add(found[0]) + + extra_fixes = set() + if options.all_imports: + if options.stage1: + prefix = 'libfuturize.fixes.' + extra_fixes.add(prefix + + 'fix_add__future__imports_except_unicode_literals') + else: + # In case the user hasn't run stage1 for some reason: + prefix = 'libpasteurize.fixes.' + extra_fixes.add(prefix + 'fix_add_all__future__imports') + extra_fixes.add(prefix + 'fix_add_future_standard_library_import') + extra_fixes.add(prefix + 'fix_add_all_future_builtins') + explicit = set() + if options.fix: + all_present = False + for fix in options.fix: + if fix == 'all': + all_present = True + else: + if ".fix_" in fix: + explicit.add(fix) + else: + # Infer the full module name for the fixer. + # First ensure that no names clash (e.g. + # lib2to3.fixes.fix_blah and libfuturize.fixes.fix_blah): + found = [f for f in avail_fixes + if f.endswith('fix_{0}'.format(fix))] + if len(found) > 1: + print("Ambiguous fixer name. Choose a fully qualified " + "module name instead from these:\n" + + "\n".join(" " + myf for myf in found), + file=sys.stderr) + return 2 + elif len(found) == 0: + print("Unknown fixer. Use --list-fixes or -l for a list.", + file=sys.stderr) + return 2 + explicit.add(found[0]) + if len(explicit & unwanted_fixes) > 0: + print("Conflicting usage: the following fixers have been " + "simultaneously requested and disallowed:\n" + + "\n".join(" " + myf for myf in (explicit & unwanted_fixes)), + file=sys.stderr) + return 2 + requested = avail_fixes.union(explicit) if all_present else explicit + else: + requested = avail_fixes.union(explicit) + fixer_names = (requested | extra_fixes) - unwanted_fixes + + input_base_dir = os.path.commonprefix(args) + if (input_base_dir and not input_base_dir.endswith(os.sep) + and not os.path.isdir(input_base_dir)): + # One or more similar names were passed, their directory is the base. + # os.path.commonprefix() is ignorant of path elements, this corrects + # for that weird API. + input_base_dir = os.path.dirname(input_base_dir) + if options.output_dir: + input_base_dir = input_base_dir.rstrip(os.sep) + logger.info('Output in %r will mirror the input directory %r layout.', + options.output_dir, input_base_dir) + + # Initialize the refactoring tool + if future.utils.PY26: + extra_kwargs = {} + else: + extra_kwargs = { + 'append_suffix': options.add_suffix, + 'output_dir': options.output_dir, + 'input_base_dir': input_base_dir, + } + + rt = StdoutRefactoringTool( + sorted(fixer_names), flags, sorted(explicit), + options.nobackups, not options.no_diffs, + **extra_kwargs) + + # Refactor all files and directories passed as arguments + if not rt.errors: + if refactor_stdin: + rt.refactor_stdin() + else: + try: + rt.refactor(args, options.write, None, + options.processes) + except refactor.MultiprocessingUnsupported: + assert options.processes > 1 + print("Sorry, -j isn't " \ + "supported on this platform.", file=sys.stderr) + return 1 + rt.summarize() + + # Return error status (0 if rt.errors is zero) + return int(bool(rt.errors)) diff --git a/src/libpasteurize/__init__.py b/src/libpasteurize/__init__.py new file mode 100644 index 00000000..4cb1cbcd --- /dev/null +++ b/src/libpasteurize/__init__.py @@ -0,0 +1 @@ +# empty to make this a package diff --git a/src/libpasteurize/fixes/__init__.py b/src/libpasteurize/fixes/__init__.py new file mode 100644 index 00000000..905aec47 --- /dev/null +++ b/src/libpasteurize/fixes/__init__.py @@ -0,0 +1,54 @@ +import sys +from lib2to3 import refactor + +# The original set of these fixes comes from lib3to2 (https://bitbucket.org/amentajo/lib3to2): +fix_names = set([ + 'libpasteurize.fixes.fix_add_all__future__imports', # from __future__ import absolute_import etc. on separate lines + 'libpasteurize.fixes.fix_add_future_standard_library_import', # we force adding this import for now, even if it doesn't seem necessary to the fix_future_standard_library fixer, for ease of testing + # 'libfuturize.fixes.fix_order___future__imports', # consolidates to a single line to simplify testing -- UNFINISHED + 'libpasteurize.fixes.fix_future_builtins', # adds "from future.builtins import *" + 'libfuturize.fixes.fix_future_standard_library', # adds "from future import standard_library" + + 'libpasteurize.fixes.fix_annotations', + # 'libpasteurize.fixes.fix_bitlength', # ints have this in Py2.7 + # 'libpasteurize.fixes.fix_bool', # need a decorator or Mixin + # 'libpasteurize.fixes.fix_bytes', # leave bytes as bytes + # 'libpasteurize.fixes.fix_classdecorator', # available in + # Py2.6+ + # 'libpasteurize.fixes.fix_collections', hmmm ... + # 'libpasteurize.fixes.fix_dctsetcomp', # avail in Py27 + 'libpasteurize.fixes.fix_division', # yes + # 'libpasteurize.fixes.fix_except', # avail in Py2.6+ + # 'libpasteurize.fixes.fix_features', # ? + 'libpasteurize.fixes.fix_fullargspec', + # 'libpasteurize.fixes.fix_funcattrs', + 'libpasteurize.fixes.fix_getcwd', + 'libpasteurize.fixes.fix_imports', # adds "from future import standard_library" + 'libpasteurize.fixes.fix_imports2', + # 'libpasteurize.fixes.fix_input', + # 'libpasteurize.fixes.fix_int', + # 'libpasteurize.fixes.fix_intern', + # 'libpasteurize.fixes.fix_itertools', + 'libpasteurize.fixes.fix_kwargs', # yes, we want this + # 'libpasteurize.fixes.fix_memoryview', + # 'libpasteurize.fixes.fix_metaclass', # write a custom handler for + # this + # 'libpasteurize.fixes.fix_methodattrs', # __func__ and __self__ seem to be defined on Py2.7 already + 'libpasteurize.fixes.fix_newstyle', # yes, we want this: explicit inheritance from object. Without new-style classes in Py2, super() will break etc. + # 'libpasteurize.fixes.fix_next', # use a decorator for this + # 'libpasteurize.fixes.fix_numliterals', # prob not + # 'libpasteurize.fixes.fix_open', # huh? + # 'libpasteurize.fixes.fix_print', # no way + 'libpasteurize.fixes.fix_printfunction', # adds __future__ import print_function + # 'libpasteurize.fixes.fix_raise_', # TODO: get this working! + + # 'libpasteurize.fixes.fix_range', # nope + # 'libpasteurize.fixes.fix_reduce', + # 'libpasteurize.fixes.fix_setliteral', + # 'libpasteurize.fixes.fix_str', + # 'libpasteurize.fixes.fix_super', # maybe, if our magic super() isn't robust enough + 'libpasteurize.fixes.fix_throw', # yes, if Py3 supports it + # 'libpasteurize.fixes.fix_unittest', + 'libpasteurize.fixes.fix_unpacking', # yes, this is useful + # 'libpasteurize.fixes.fix_with' # way out of date + ]) diff --git a/libfuturize/fixes3/feature_base.py b/src/libpasteurize/fixes/feature_base.py similarity index 99% rename from libfuturize/fixes3/feature_base.py rename to src/libpasteurize/fixes/feature_base.py index 8a264964..c36d9a95 100644 --- a/libfuturize/fixes3/feature_base.py +++ b/src/libpasteurize/fixes/feature_base.py @@ -40,7 +40,7 @@ def update_mapping(self): Called every time we care about the mapping of names to features. """ self.mapping = dict([(f.name, f) for f in iter(self)]) - + @property def PATTERN(self): u""" diff --git a/libfuturize/fixes2/fix_add__future__imports.py b/src/libpasteurize/fixes/fix_add_all__future__imports.py similarity index 68% rename from libfuturize/fixes2/fix_add__future__imports.py rename to src/libpasteurize/fixes/fix_add_all__future__imports.py index b8f21307..a151f9f1 100644 --- a/libfuturize/fixes2/fix_add__future__imports.py +++ b/src/libpasteurize/fixes/fix_add_all__future__imports.py @@ -1,7 +1,10 @@ """ Fixer for adding: - from __future__ import (absolute_import, division, print_function, unicode_literals) + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function + from __future__ import unicode_literals This is done when converting from Py3 to both Py3/Py2. """ @@ -9,14 +12,13 @@ from lib2to3 import fixer_base from libfuturize.fixer_util import future_import -class FixAddFutureImports(fixer_base.BaseFix): +class FixAddAllFutureImports(fixer_base.BaseFix): BM_compatible = True PATTERN = "file_input" run_order = 1 def transform(self, node, results): - future_import(u"unicode_literals", node) - future_import(u"print_function", node) - future_import(u"division", node) future_import(u"absolute_import", node) - + future_import(u"division", node) + future_import(u"print_function", node) + future_import(u"unicode_literals", node) diff --git a/src/libpasteurize/fixes/fix_add_all_future_builtins.py b/src/libpasteurize/fixes/fix_add_all_future_builtins.py new file mode 100644 index 00000000..22911bad --- /dev/null +++ b/src/libpasteurize/fixes/fix_add_all_future_builtins.py @@ -0,0 +1,37 @@ +""" +For the ``future`` package. + +Adds this import line:: + + from builtins import (ascii, bytes, chr, dict, filter, hex, input, + int, list, map, next, object, oct, open, pow, + range, round, str, super, zip) + +to a module, irrespective of whether each definition is used. + +Adds these imports after any other imports (in an initial block of them). +""" + +from __future__ import unicode_literals + +from lib2to3 import fixer_base + +from libfuturize.fixer_util import touch_import_top + + +class FixAddAllFutureBuiltins(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "file_input" + run_order = 1 + + def transform(self, node, results): + # import_str = """(ascii, bytes, chr, dict, filter, hex, input, + # int, list, map, next, object, oct, open, pow, + # range, round, str, super, zip)""" + touch_import_top(u'builtins', '*', node) + + # builtins = """ascii bytes chr dict filter hex input + # int list map next object oct open pow + # range round str super zip""" + # for builtin in sorted(builtins.split(), reverse=True): + # touch_import_top(u'builtins', builtin, node) diff --git a/libfuturize/fixes2/fix_add_future_standard_library_import.py b/src/libpasteurize/fixes/fix_add_future_standard_library_import.py similarity index 71% rename from libfuturize/fixes2/fix_add_future_standard_library_import.py rename to src/libpasteurize/fixes/fix_add_future_standard_library_import.py index 5c077f19..0778406a 100644 --- a/libfuturize/fixes2/fix_add_future_standard_library_import.py +++ b/src/libpasteurize/fixes/fix_add_future_standard_library_import.py @@ -1,11 +1,12 @@ """ For the ``future`` package. -Also adds this import line: +Adds this import line: from future import standard_library -after any __future__ imports but before any other imports. +after any __future__ imports but before any other imports. Doesn't actually +change the imports to Py3 style. """ from lib2to3 import fixer_base @@ -19,4 +20,4 @@ class FixAddFutureStandardLibraryImport(fixer_base.BaseFix): def transform(self, node, results): # TODO: add a blank line between any __future__ imports and this? touch_import_top(u'future', u'standard_library', node) - + # TODO: also add standard_library.install_hooks() diff --git a/libfuturize/fixes3/fix_annotations.py b/src/libpasteurize/fixes/fix_annotations.py similarity index 99% rename from libfuturize/fixes3/fix_annotations.py rename to src/libpasteurize/fixes/fix_annotations.py index 1926288c..884b6741 100644 --- a/libfuturize/fixes3/fix_annotations.py +++ b/src/libpasteurize/fixes/fix_annotations.py @@ -19,7 +19,7 @@ def warn_once(self, node, reason): if not self.warned: self.warned = True self.warning(node, reason=reason) - + PATTERN = u""" funcdef< 'def' any parameters< '(' [params=any] ')' > ['->' ret=any] ':' any* > """ diff --git a/libfuturize/fixes3/fix_division.py b/src/libpasteurize/fixes/fix_division.py similarity index 100% rename from libfuturize/fixes3/fix_division.py rename to src/libpasteurize/fixes/fix_division.py diff --git a/libfuturize/fixes3/fix_features.py b/src/libpasteurize/fixes/fix_features.py similarity index 99% rename from libfuturize/fixes3/fix_features.py rename to src/libpasteurize/fixes/fix_features.py index 7e5f545a..52630f98 100644 --- a/libfuturize/fixes3/fix_features.py +++ b/src/libpasteurize/fixes/fix_features.py @@ -71,7 +71,7 @@ def match(self, node): # if it's there, so we don't care if it fails for normal reasons. pass return to_ret - + def transform(self, node, results): for feature_name in results: if feature_name in self.features_warned: diff --git a/libfuturize/fixes3/fix_fullargspec.py b/src/libpasteurize/fixes/fix_fullargspec.py similarity index 98% rename from libfuturize/fixes3/fix_fullargspec.py rename to src/libpasteurize/fixes/fix_fullargspec.py index 489295f7..4bd37e15 100644 --- a/libfuturize/fixes3/fix_fullargspec.py +++ b/src/libpasteurize/fixes/fix_fullargspec.py @@ -8,7 +8,7 @@ warn_msg = u"some of the values returned by getfullargspec are not valid in Python 2 and have no equivalent." class FixFullargspec(fixer_base.BaseFix): - + PATTERN = u"'getfullargspec'" def transform(self, node, results): diff --git a/libfuturize/fixes2/fix_future_builtins.py b/src/libpasteurize/fixes/fix_future_builtins.py similarity index 62% rename from libfuturize/fixes2/fix_future_builtins.py rename to src/libpasteurize/fixes/fix_future_builtins.py index e9b14c81..68496799 100644 --- a/libfuturize/fixes2/fix_future_builtins.py +++ b/src/libpasteurize/fixes/fix_future_builtins.py @@ -1,14 +1,9 @@ """ -For the ``future`` package. +Adds this import line: -Adds this import line:: + from builtins import XYZ - from future.builtins import XYZ - -for each of the functions XYZ that is used in the module from those in -future.builtins. - -Adds these imports after any other imports (in an initial block of them). +for each of the functions XYZ that is used in the module. """ from __future__ import unicode_literals @@ -21,21 +16,24 @@ # All builtins are: # from future.builtins.iterators import (filter, map, zip) -# from future.builtins.misc import (ascii, chr, hex, input, isinstance, oct, open) -# from future.builtins.backports import (bytes, int, range, round, str, super) +# from future.builtins.misc import (ascii, chr, hex, input, isinstance, oct, open, round, super) +# from future.types import (bytes, dict, int, range, str) # We don't need isinstance any more. replaced_builtins = '''filter map zip - ascii chr hex input oct open - bytes int range round str super'''.split() + ascii chr hex input next oct open round super + bytes dict int range str'''.split() expression = '|'.join(["name='{0}'".format(name) for name in replaced_builtins]) class FixFutureBuiltins(fixer_base.BaseFix): BM_compatible = True - run_order = 8 + run_order = 9 + # Currently we only match uses as a function. This doesn't match e.g.: + # if isinstance(s, str): + # ... PATTERN = """ power< ({0}) trailer< '(' args=[any] ')' > @@ -44,6 +42,5 @@ class FixFutureBuiltins(fixer_base.BaseFix): def transform(self, node, results): name = results["name"] - touch_import_top(u'future.builtins', name.value, node) + touch_import_top(u'builtins', name.value, node) # name.replace(Name(u"input", prefix=name.prefix)) - diff --git a/libfuturize/fixes3/fix_getcwd.py b/src/libpasteurize/fixes/fix_getcwd.py similarity index 100% rename from libfuturize/fixes3/fix_getcwd.py rename to src/libpasteurize/fixes/fix_getcwd.py diff --git a/libfuturize/fixes3/fix_imports.py b/src/libpasteurize/fixes/fix_imports.py similarity index 97% rename from libfuturize/fixes3/fix_imports.py rename to src/libpasteurize/fixes/fix_imports.py index d79558ce..b18ecf3d 100644 --- a/libfuturize/fixes3/fix_imports.py +++ b/src/libpasteurize/fixes/fix_imports.py @@ -16,6 +16,7 @@ u"winreg": u"_winreg", u"configparser": u"ConfigParser", u"copyreg": u"copy_reg", + u"multiprocessing.SimpleQueue": u"multiprocessing.queues.SimpleQueue", u"queue": u"Queue", u"socketserver": u"SocketServer", u"_markupbase": u"markupbase", @@ -40,6 +41,7 @@ u"tkinter.colorchooser": u"tkColorChooser", u"tkinter.commondialog": u"tkCommonDialog", u"tkinter.font": u"tkFont", + u"tkinter.ttk": u"ttk", u"tkinter.messagebox": u"tkMessageBox", u"tkinter.turtle": u"turtle", u"urllib.robotparser": u"robotparser", @@ -109,4 +111,3 @@ class FixImports(fixer_base.BaseFix): def transform(self, node, results): touch_import_top(u'future', u'standard_library', node) - diff --git a/libfuturize/fixes3/fix_imports2.py b/src/libpasteurize/fixes/fix_imports2.py similarity index 98% rename from libfuturize/fixes3/fix_imports2.py rename to src/libpasteurize/fixes/fix_imports2.py index 1880d714..70444e9e 100644 --- a/libfuturize/fixes3/fix_imports2.py +++ b/src/libpasteurize/fixes/fix_imports2.py @@ -4,7 +4,6 @@ from lib2to3 import fixer_base from lib2to3.fixer_util import Name, String, FromImport, Newline, Comma -from ..fixer_util import token, syms, Leaf, Node, Star, indentation, ImportAsName from libfuturize.fixer_util import touch_import_top @@ -19,11 +18,11 @@ u'RADIOBUTTON', u'RAISED', u'READABLE', u'RIDGE', u'RIGHT', u'ROUND', u'S', u'SCROLL', u'SE', u'SEL', u'SEL_FIRST', u'SEL_LAST', u'SEPARATOR', u'SINGLE', u'SOLID', u'SUNKEN', u'SW', u'StringTypes', - u'TOP', u'TRUE', u'TclVersion', u'TkVersion', u'UNDERLINE', + u'TOP', u'TRUE', u'TclVersion', u'TkVersion', u'UNDERLINE', u'UNITS', u'VERTICAL', u'W', u'WORD', u'WRITABLE', u'X', u'Y', u'YES', u'wantobjects') -PY2MODULES = { +PY2MODULES = { u'urllib2' : ( u'AbstractBasicAuthHandler', u'AbstractDigestAuthHandler', u'AbstractHTTPHandler', u'BaseHandler', u'CacheFTPHandler', @@ -173,4 +172,3 @@ class FixImports2(fixer_base.BaseFix): def transform(self, node, results): touch_import_top(u'future', u'standard_library', node) - diff --git a/libfuturize/fixes3/fix_kwargs.py b/src/libpasteurize/fixes/fix_kwargs.py similarity index 99% rename from libfuturize/fixes3/fix_kwargs.py rename to src/libpasteurize/fixes/fix_kwargs.py index 59a3043b..290f991e 100644 --- a/libfuturize/fixes3/fix_kwargs.py +++ b/src/libpasteurize/fixes/fix_kwargs.py @@ -61,7 +61,7 @@ def remove_params(raw_params, kwargs_default=_kwargs_default_name): return False else: return True - + def needs_fixing(raw_params, kwargs_default=_kwargs_default_name): u""" Returns string with the name of the kwargs dict if the params after the first star need fixing @@ -145,4 +145,3 @@ def transform(self, node, results): arglist.append_child(Comma()) arglist.append_child(DoubleStar(prefix=u" ")) arglist.append_child(Name(new_kwargs)) - diff --git a/libfuturize/fixes3/fix_memoryview.py b/src/libpasteurize/fixes/fix_memoryview.py similarity index 100% rename from libfuturize/fixes3/fix_memoryview.py rename to src/libpasteurize/fixes/fix_memoryview.py diff --git a/libfuturize/fixes3/fix_metaclass.py b/src/libpasteurize/fixes/fix_metaclass.py similarity index 99% rename from libfuturize/fixes3/fix_metaclass.py rename to src/libpasteurize/fixes/fix_metaclass.py index 5e6e64d8..52dd1d14 100644 --- a/libfuturize/fixes3/fix_metaclass.py +++ b/src/libpasteurize/fixes/fix_metaclass.py @@ -61,7 +61,7 @@ def transform(self, node, results): name = meta name.prefix = u" " stmt_node = Node(syms.atom, [target, equal, name]) - + suitify(node) for item in node.children: if item.type == syms.suite: diff --git a/src/libpasteurize/fixes/fix_newstyle.py b/src/libpasteurize/fixes/fix_newstyle.py new file mode 100644 index 00000000..cc6b3adc --- /dev/null +++ b/src/libpasteurize/fixes/fix_newstyle.py @@ -0,0 +1,33 @@ +u""" +Fixer for "class Foo: ..." -> "class Foo(object): ..." +""" + +from lib2to3 import fixer_base +from lib2to3.fixer_util import LParen, RParen, Name + +from libfuturize.fixer_util import touch_import_top + + +def insert_object(node, idx): + node.insert_child(idx, RParen()) + node.insert_child(idx, Name(u"object")) + node.insert_child(idx, LParen()) + +class FixNewstyle(fixer_base.BaseFix): + + # Match: + # class Blah: + # and: + # class Blah(): + + PATTERN = u"classdef< 'class' NAME ['(' ')'] colon=':' any >" + + def transform(self, node, results): + colon = results[u"colon"] + idx = node.children.index(colon) + if (node.children[idx-2].value == '(' and + node.children[idx-1].value == ')'): + del node.children[idx-2:idx] + idx -= 2 + insert_object(node, idx) + touch_import_top(u'builtins', 'object', node) diff --git a/libfuturize/fixes3/fix_next.py b/src/libpasteurize/fixes/fix_next.py similarity index 100% rename from libfuturize/fixes3/fix_next.py rename to src/libpasteurize/fixes/fix_next.py diff --git a/libfuturize/fixes3/fix_printfunction.py b/src/libpasteurize/fixes/fix_printfunction.py similarity index 100% rename from libfuturize/fixes3/fix_printfunction.py rename to src/libpasteurize/fixes/fix_printfunction.py diff --git a/libfuturize/fixes3/fix_raise.py b/src/libpasteurize/fixes/fix_raise.py similarity index 100% rename from libfuturize/fixes3/fix_raise.py rename to src/libpasteurize/fixes/fix_raise.py diff --git a/src/libpasteurize/fixes/fix_raise_.py b/src/libpasteurize/fixes/fix_raise_.py new file mode 100644 index 00000000..0f020c45 --- /dev/null +++ b/src/libpasteurize/fixes/fix_raise_.py @@ -0,0 +1,35 @@ +u"""Fixer for + raise E(V).with_traceback(T) + to: + from future.utils import raise_ + ... + raise_(E, V, T) + +TODO: FIXME!! + +""" + +from lib2to3 import fixer_base +from lib2to3.fixer_util import Comma, Node, Leaf, token, syms + +class FixRaise(fixer_base.BaseFix): + + PATTERN = u""" + raise_stmt< 'raise' (power< name=any [trailer< '(' val=any* ')' >] + [trailer< '.' 'with_traceback' > trailer< '(' trc=any ')' >] > | any) ['from' chain=any] >""" + + def transform(self, node, results): + FIXME + name, val, trc = (results.get(u"name"), results.get(u"val"), results.get(u"trc")) + chain = results.get(u"chain") + if chain is not None: + self.warning(node, u"explicit exception chaining is not supported in Python 2") + chain.prev_sibling.remove() + chain.remove() + if trc is not None: + val = val[0] if val else Leaf(token.NAME, u"None") + val.prefix = trc.prefix = u" " + kids = [Leaf(token.NAME, u"raise"), name.clone(), Comma(), + val.clone(), Comma(), trc.clone()] + raise_stmt = Node(syms.raise_stmt, kids) + node.replace(raise_stmt) diff --git a/libfuturize/fixes3/fix_throw.py b/src/libpasteurize/fixes/fix_throw.py similarity index 100% rename from libfuturize/fixes3/fix_throw.py rename to src/libpasteurize/fixes/fix_throw.py diff --git a/libfuturize/fixes3/fix_unpacking.py b/src/libpasteurize/fixes/fix_unpacking.py similarity index 94% rename from libfuturize/fixes3/fix_unpacking.py rename to src/libpasteurize/fixes/fix_unpacking.py index 00c067c5..6e839e6b 100644 --- a/libfuturize/fixes3/fix_unpacking.py +++ b/src/libpasteurize/fixes/fix_unpacking.py @@ -8,8 +8,8 @@ from itertools import count from lib2to3.fixer_util import (Assign, Comma, Call, Newline, Name, Number, token, syms, Node, Leaf) -from ..fixer_util import indentation, suitify, commatize -# from ..fixer_util import Assign, Comma, Call, Newline, Name, Number, indentation, suitify, commatize, token, syms, Node, Leaf +from libfuturize.fixer_util import indentation, suitify, commatize +# from libfuturize.fixer_util import Assign, Comma, Call, Newline, Name, Number, indentation, suitify, commatize, token, syms, Node, Leaf def assignment_source(num_pre, num_post, LISTNAME, ITERNAME): u""" @@ -18,8 +18,12 @@ def assignment_source(num_pre, num_post, LISTNAME, ITERNAME): Returns a source fit for Assign() from fixer_util """ children = [] - pre = unicode(num_pre) - post = unicode(num_post) + try: + pre = unicode(num_pre) + post = unicode(num_post) + except NameError: + pre = str(num_pre) + post = str(num_post) # This code builds the assignment source from lib2to3 tree primitives. # It's not very readable, but it seems like the most correct way to do it. if num_pre > 0: @@ -60,7 +64,7 @@ def fix_explicit_context(self, node, results): setup_line = Assign(Name(self.LISTNAME), Call(Name(u"list"), [source.clone()])) power_line = Assign(target, assignment_source(len(pre), len(post), self.LISTNAME, self.ITERNAME)) return setup_line, power_line - + def fix_implicit_context(self, node, results): u""" Only example of the implicit context is diff --git a/src/libpasteurize/main.py b/src/libpasteurize/main.py new file mode 100644 index 00000000..4179174b --- /dev/null +++ b/src/libpasteurize/main.py @@ -0,0 +1,204 @@ +""" +pasteurize: automatic conversion of Python 3 code to clean 2/3 code +=================================================================== + +``pasteurize`` attempts to convert existing Python 3 code into source-compatible +Python 2 and 3 code. + +Use it like this on Python 3 code: + + $ pasteurize --verbose mypython3script.py + +This removes any Py3-only syntax (e.g. new metaclasses) and adds these +import lines: + + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function + from __future__ import unicode_literals + from future import standard_library + standard_library.install_hooks() + from builtins import * + +To write changes to the files, use the -w flag. + +It also adds any other wrappers needed for Py2/3 compatibility. + +Note that separate stages are not available (or needed) when converting from +Python 3 with ``pasteurize`` as they are when converting from Python 2 with +``futurize``. + +The --all-imports option forces adding all ``__future__`` imports, +``builtins`` imports, and standard library aliases, even if they don't +seem necessary for the current state of each module. (This can simplify +testing, and can reduce the need to think about Py2 compatibility when editing +the code further.) + +""" + +from __future__ import (absolute_import, print_function, unicode_literals) + +import sys +import logging +import optparse +from lib2to3.main import main, warn, StdoutRefactoringTool +from lib2to3 import refactor + +from future import __version__ +from libpasteurize.fixes import fix_names + + +def main(args=None): + """Main program. + + Returns a suggested exit status (0, 1, 2). + """ + # Set up option parser + parser = optparse.OptionParser(usage="pasteurize [options] file|dir ...") + parser.add_option("-V", "--version", action="store_true", + help="Report the version number of pasteurize") + parser.add_option("-a", "--all-imports", action="store_true", + help="Adds all __future__ and future imports to each module") + parser.add_option("-f", "--fix", action="append", default=[], + help="Each FIX specifies a transformation; default: all") + parser.add_option("-j", "--processes", action="store", default=1, + type="int", help="Run 2to3 concurrently") + parser.add_option("-x", "--nofix", action="append", default=[], + help="Prevent a fixer from being run.") + parser.add_option("-l", "--list-fixes", action="store_true", + help="List available transformations") + # parser.add_option("-p", "--print-function", action="store_true", + # help="Modify the grammar so that print() is a function") + parser.add_option("-v", "--verbose", action="store_true", + help="More verbose logging") + parser.add_option("--no-diffs", action="store_true", + help="Don't show diffs of the refactoring") + parser.add_option("-w", "--write", action="store_true", + help="Write back modified files") + parser.add_option("-n", "--nobackups", action="store_true", default=False, + help="Don't write backups for modified files.") + + # Parse command line arguments + refactor_stdin = False + flags = {} + options, args = parser.parse_args(args) + fixer_pkg = 'libpasteurize.fixes' + avail_fixes = fix_names + flags["print_function"] = True + + if not options.write and options.no_diffs: + warn("not writing files and not printing diffs; that's not very useful") + if not options.write and options.nobackups: + parser.error("Can't use -n without -w") + if options.version: + print(__version__) + return 0 + if options.list_fixes: + print("Available transformations for the -f/--fix option:") + for fixname in sorted(avail_fixes): + print(fixname) + if not args: + return 0 + if not args: + print("At least one file or directory argument required.", + file=sys.stderr) + print("Use --help to show usage.", file=sys.stderr) + return 2 + if "-" in args: + refactor_stdin = True + if options.write: + print("Can't write to stdin.", file=sys.stderr) + return 2 + + # Set up logging handler + level = logging.DEBUG if options.verbose else logging.INFO + logging.basicConfig(format='%(name)s: %(message)s', level=level) + + unwanted_fixes = set() + for fix in options.nofix: + if ".fix_" in fix: + unwanted_fixes.add(fix) + else: + # Infer the full module name for the fixer. + # First ensure that no names clash (e.g. + # lib2to3.fixes.fix_blah and libfuturize.fixes.fix_blah): + found = [f for f in avail_fixes + if f.endswith('fix_{0}'.format(fix))] + if len(found) > 1: + print("Ambiguous fixer name. Choose a fully qualified " + "module name instead from these:\n" + + "\n".join(" " + myf for myf in found), + file=sys.stderr) + return 2 + elif len(found) == 0: + print("Unknown fixer. Use --list-fixes or -l for a list.", + file=sys.stderr) + return 2 + unwanted_fixes.add(found[0]) + + extra_fixes = set() + if options.all_imports: + prefix = 'libpasteurize.fixes.' + extra_fixes.add(prefix + 'fix_add_all__future__imports') + extra_fixes.add(prefix + 'fix_add_future_standard_library_import') + extra_fixes.add(prefix + 'fix_add_all_future_builtins') + + explicit = set() + if options.fix: + all_present = False + for fix in options.fix: + if fix == 'all': + all_present = True + else: + if ".fix_" in fix: + explicit.add(fix) + else: + # Infer the full module name for the fixer. + # First ensure that no names clash (e.g. + # lib2to3.fixes.fix_blah and libpasteurize.fixes.fix_blah): + found = [f for f in avail_fixes + if f.endswith('fix_{0}'.format(fix))] + if len(found) > 1: + print("Ambiguous fixer name. Choose a fully qualified " + "module name instead from these:\n" + + "\n".join(" " + myf for myf in found), + file=sys.stderr) + return 2 + elif len(found) == 0: + print("Unknown fixer. Use --list-fixes or -l for a list.", + file=sys.stderr) + return 2 + explicit.add(found[0]) + if len(explicit & unwanted_fixes) > 0: + print("Conflicting usage: the following fixers have been " + "simultaneously requested and disallowed:\n" + + "\n".join(" " + myf for myf in (explicit & unwanted_fixes)), + file=sys.stderr) + return 2 + requested = avail_fixes.union(explicit) if all_present else explicit + else: + requested = avail_fixes.union(explicit) + + fixer_names = requested | extra_fixes - unwanted_fixes + + # Initialize the refactoring tool + rt = StdoutRefactoringTool(sorted(fixer_names), flags, set(), + options.nobackups, not options.no_diffs) + + # Refactor all files and directories passed as arguments + if not rt.errors: + if refactor_stdin: + rt.refactor_stdin() + else: + try: + rt.refactor(args, options.write, None, + options.processes) + except refactor.MultiprocessingUnsupported: + assert options.processes > 1 + print("Sorry, -j isn't " \ + "supported on this platform.", file=sys.stderr) + return 1 + rt.summarize() + + # Return error status (0 if rt.errors is zero) + return int(bool(rt.errors)) diff --git a/src/past/__init__.py b/src/past/__init__.py new file mode 100644 index 00000000..54619e0a --- /dev/null +++ b/src/past/__init__.py @@ -0,0 +1,90 @@ +# coding=utf-8 +""" +past: compatibility with Python 2 from Python 3 +=============================================== + +``past`` is a package to aid with Python 2/3 compatibility. Whereas ``future`` +contains backports of Python 3 constructs to Python 2, ``past`` provides +implementations of some Python 2 constructs in Python 3 and tools to import and +run Python 2 code in Python 3. It is intended to be used sparingly, as a way of +running old Python 2 code from Python 3 until the code is ported properly. + +Potential uses for libraries: + +- as a step in porting a Python 2 codebase to Python 3 (e.g. with the ``futurize`` script) +- to provide Python 3 support for previously Python 2-only libraries with the + same APIs as on Python 2 -- particularly with regard to 8-bit strings (the + ``past.builtins.str`` type). +- to aid in providing minimal-effort Python 3 support for applications using + libraries that do not yet wish to upgrade their code properly to Python 3, or + wish to upgrade it gradually to Python 3 style. + + +Here are some code examples that run identically on Python 3 and 2:: + + >>> from past.builtins import str as oldstr + + >>> philosopher = oldstr(u'\u5b54\u5b50'.encode('utf-8')) + >>> # This now behaves like a Py2 byte-string on both Py2 and Py3. + >>> # For example, indexing returns a Python 2-like string object, not + >>> # an integer: + >>> philosopher[0] + '\xe5' + >>> type(philosopher[0]) + + + >>> # List-producing versions of range, reduce, map, filter + >>> from past.builtins import range, reduce + >>> range(10) + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + >>> reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) + 15 + + >>> # Other functions removed in Python 3 are resurrected ... + >>> from past.builtins import execfile + >>> execfile('myfile.py') + + >>> from past.builtins import raw_input + >>> name = raw_input('What is your name? ') + What is your name? [cursor] + + >>> from past.builtins import reload + >>> reload(mymodule) # equivalent to imp.reload(mymodule) in Python 3 + + >>> from past.builtins import xrange + >>> for i in xrange(10): + ... pass + + +It also provides import hooks so you can import and use Python 2 modules like +this:: + + $ python3 + + >>> from past.translation import autotranslate + >>> authotranslate('mypy2module') + >>> import mypy2module + +until the authors of the Python 2 modules have upgraded their code. Then, for +example:: + + >>> mypy2module.func_taking_py2_string(oldstr(b'abcd')) + + +Credits +------- + +:Author: Ed Schofield, Jordan M. Adler, et al +:Sponsor: Python Charmers: https://pythoncharmers.com + + +Licensing +--------- +Copyright 2013-2024 Python Charmers, Australia. +The software is distributed under an MIT licence. See LICENSE.txt. +""" + +from future import __version__, __copyright__, __license__ + +__title__ = 'past' +__author__ = 'Ed Schofield' diff --git a/src/past/builtins/__init__.py b/src/past/builtins/__init__.py new file mode 100644 index 00000000..1b19e373 --- /dev/null +++ b/src/past/builtins/__init__.py @@ -0,0 +1,72 @@ +""" +A resurrection of some old functions from Python 2 for use in Python 3. These +should be used sparingly, to help with porting efforts, since code using them +is no longer standard Python 3 code. + +This module provides the following: + +1. Implementations of these builtin functions which have no equivalent on Py3: + +- apply +- chr +- cmp +- execfile + +2. Aliases: + +- intern <- sys.intern +- raw_input <- input +- reduce <- functools.reduce +- reload <- imp.reload +- unichr <- chr +- unicode <- str +- xrange <- range + +3. List-producing versions of the corresponding Python 3 iterator-producing functions: + +- filter +- map +- range +- zip + +4. Forward-ported Py2 types: + +- basestring +- dict +- str +- long +- unicode + +""" + +from future.utils import PY3 +from past.builtins.noniterators import (filter, map, range, reduce, zip) +# from past.builtins.misc import (ascii, hex, input, oct, open) +if PY3: + from past.types import (basestring, + olddict as dict, + oldstr as str, + long, + unicode) +else: + from __builtin__ import (basestring, dict, str, long, unicode) + +from past.builtins.misc import (apply, chr, cmp, execfile, intern, oct, + raw_input, reload, unichr, unicode, xrange) +from past import utils + + +if utils.PY3: + # We only import names that shadow the builtins on Py3. No other namespace + # pollution on Py3. + + # Only shadow builtins on Py3; no new names + __all__ = ['filter', 'map', 'range', 'reduce', 'zip', + 'basestring', 'dict', 'str', 'long', 'unicode', + 'apply', 'chr', 'cmp', 'execfile', 'intern', 'raw_input', + 'reload', 'unichr', 'xrange' + ] + +else: + # No namespace pollution on Py2 + __all__ = [] diff --git a/src/past/builtins/misc.py b/src/past/builtins/misc.py new file mode 100644 index 00000000..0b8e6a98 --- /dev/null +++ b/src/past/builtins/misc.py @@ -0,0 +1,162 @@ +from __future__ import unicode_literals + +import inspect +import sys +import math +import numbers + +from future.utils import PY2, PY3, exec_ + + +if PY2: + from collections import Mapping +else: + from collections.abc import Mapping + +if PY3: + import builtins + from collections.abc import Mapping + + def apply(f, *args, **kw): + return f(*args, **kw) + + from past.builtins import str as oldstr + + def chr(i): + """ + Return a byte-string of one character with ordinal i; 0 <= i <= 256 + """ + return oldstr(bytes((i,))) + + def cmp(x, y): + """ + cmp(x, y) -> integer + + Return negative if xy. + Python2 had looser comparison allowing cmp None and non Numerical types and collections. + Try to match the old behavior + """ + if isinstance(x, set) and isinstance(y, set): + raise TypeError('cannot compare sets using cmp()',) + try: + if isinstance(x, numbers.Number) and math.isnan(x): + if not isinstance(y, numbers.Number): + raise TypeError('cannot compare float("nan"), {type_y} with cmp'.format(type_y=type(y))) + if isinstance(y, int): + return 1 + else: + return -1 + if isinstance(y, numbers.Number) and math.isnan(y): + if not isinstance(x, numbers.Number): + raise TypeError('cannot compare {type_x}, float("nan") with cmp'.format(type_x=type(x))) + if isinstance(x, int): + return -1 + else: + return 1 + return (x > y) - (x < y) + except TypeError: + if x == y: + return 0 + type_order = [ + type(None), + numbers.Number, + dict, list, + set, + (str, bytes), + ] + x_type_index = y_type_index = None + for i, type_match in enumerate(type_order): + if isinstance(x, type_match): + x_type_index = i + if isinstance(y, type_match): + y_type_index = i + if cmp(x_type_index, y_type_index) == 0: + if isinstance(x, bytes) and isinstance(y, str): + return cmp(x.decode('ascii'), y) + if isinstance(y, bytes) and isinstance(x, str): + return cmp(x, y.decode('ascii')) + elif isinstance(x, list): + # if both arguments are lists take the comparison of the first non equal value + for x_elem, y_elem in zip(x, y): + elem_cmp_val = cmp(x_elem, y_elem) + if elem_cmp_val != 0: + return elem_cmp_val + # if all elements are equal, return equal/0 + return 0 + elif isinstance(x, dict): + if len(x) != len(y): + return cmp(len(x), len(y)) + else: + x_key = min(a for a in x if a not in y or x[a] != y[a]) + y_key = min(b for b in y if b not in x or x[b] != y[b]) + if x_key != y_key: + return cmp(x_key, y_key) + else: + return cmp(x[x_key], y[y_key]) + return cmp(x_type_index, y_type_index) + + from sys import intern + + def oct(number): + """oct(number) -> string + + Return the octal representation of an integer + """ + return '0' + builtins.oct(number)[2:] + + raw_input = input + # imp was deprecated in python 3.6 + if sys.version_info >= (3, 6): + from importlib import reload + else: + # for python2, python3 <= 3.4 + from imp import reload + unicode = str + unichr = chr + xrange = range +else: + import __builtin__ + from collections import Mapping + apply = __builtin__.apply + chr = __builtin__.chr + cmp = __builtin__.cmp + execfile = __builtin__.execfile + intern = __builtin__.intern + oct = __builtin__.oct + raw_input = __builtin__.raw_input + reload = __builtin__.reload + unicode = __builtin__.unicode + unichr = __builtin__.unichr + xrange = __builtin__.xrange + + +if PY3: + def execfile(filename, myglobals=None, mylocals=None): + """ + Read and execute a Python script from a file in the given namespaces. + The globals and locals are dictionaries, defaulting to the current + globals and locals. If only globals is given, locals defaults to it. + """ + if myglobals is None: + # There seems to be no alternative to frame hacking here. + caller_frame = inspect.stack()[1] + myglobals = caller_frame[0].f_globals + mylocals = caller_frame[0].f_locals + elif mylocals is None: + # Only if myglobals is given do we set mylocals to it. + mylocals = myglobals + if not isinstance(myglobals, Mapping): + raise TypeError('globals must be a mapping') + if not isinstance(mylocals, Mapping): + raise TypeError('locals must be a mapping') + with open(filename, "rb") as fin: + source = fin.read() + code = compile(source, filename, "exec") + exec_(code, myglobals, mylocals) + + +if PY3: + __all__ = ['apply', 'chr', 'cmp', 'execfile', 'intern', 'raw_input', + 'reload', 'unichr', 'unicode', 'xrange'] +else: + __all__ = [] diff --git a/src/past/builtins/noniterators.py b/src/past/builtins/noniterators.py new file mode 100644 index 00000000..183ffffd --- /dev/null +++ b/src/past/builtins/noniterators.py @@ -0,0 +1,272 @@ +""" +This module is designed to be used as follows:: + + from past.builtins.noniterators import filter, map, range, reduce, zip + +And then, for example:: + + assert isinstance(range(5), list) + +The list-producing functions this brings in are:: + +- ``filter`` +- ``map`` +- ``range`` +- ``reduce`` +- ``zip`` + +""" + +from __future__ import division, absolute_import, print_function + +from itertools import chain, starmap +import itertools # since zip_longest doesn't exist on Py2 +from past.types import basestring +from past.utils import PY3 + + +def flatmap(f, items): + return chain.from_iterable(map(f, items)) + + +if PY3: + import builtins + + # list-producing versions of the major Python iterating functions + def oldfilter(*args): + """ + filter(function or None, sequence) -> list, tuple, or string + + Return those items of sequence for which function(item) is true. + If function is None, return the items that are true. If sequence + is a tuple or string, return the same type, else return a list. + """ + mytype = type(args[1]) + if isinstance(args[1], basestring): + return mytype().join(builtins.filter(*args)) + elif isinstance(args[1], (tuple, list)): + return mytype(builtins.filter(*args)) + else: + # Fall back to list. Is this the right thing to do? + return list(builtins.filter(*args)) + + # This is surprisingly difficult to get right. For example, the + # solutions here fail with the test cases in the docstring below: + # http://stackoverflow.com/questions/8072755/ + def oldmap(func, *iterables): + """ + map(function, sequence[, sequence, ...]) -> list + + Return a list of the results of applying the function to the + items of the argument sequence(s). If more than one sequence is + given, the function is called with an argument list consisting of + the corresponding item of each sequence, substituting None for + missing values when not all sequences have the same length. If + the function is None, return a list of the items of the sequence + (or a list of tuples if more than one sequence). + + Test cases: + >>> oldmap(None, 'hello world') + ['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd'] + + >>> oldmap(None, range(4)) + [0, 1, 2, 3] + + More test cases are in test_past.test_builtins. + """ + zipped = itertools.zip_longest(*iterables) + l = list(zipped) + if len(l) == 0: + return [] + if func is None: + result = l + else: + result = list(starmap(func, l)) + + # Inspect to see whether it's a simple sequence of tuples + try: + if max([len(item) for item in result]) == 1: + return list(chain.from_iterable(result)) + # return list(flatmap(func, result)) + except TypeError as e: + # Simple objects like ints have no len() + pass + return result + + ############################ + ### For reference, the source code for Py2.7 map function: + # static PyObject * + # builtin_map(PyObject *self, PyObject *args) + # { + # typedef struct { + # PyObject *it; /* the iterator object */ + # int saw_StopIteration; /* bool: did the iterator end? */ + # } sequence; + # + # PyObject *func, *result; + # sequence *seqs = NULL, *sqp; + # Py_ssize_t n, len; + # register int i, j; + # + # n = PyTuple_Size(args); + # if (n < 2) { + # PyErr_SetString(PyExc_TypeError, + # "map() requires at least two args"); + # return NULL; + # } + # + # func = PyTuple_GetItem(args, 0); + # n--; + # + # if (func == Py_None) { + # if (PyErr_WarnPy3k("map(None, ...) not supported in 3.x; " + # "use list(...)", 1) < 0) + # return NULL; + # if (n == 1) { + # /* map(None, S) is the same as list(S). */ + # return PySequence_List(PyTuple_GetItem(args, 1)); + # } + # } + # + # /* Get space for sequence descriptors. Must NULL out the iterator + # * pointers so that jumping to Fail_2 later doesn't see trash. + # */ + # if ((seqs = PyMem_NEW(sequence, n)) == NULL) { + # PyErr_NoMemory(); + # return NULL; + # } + # for (i = 0; i < n; ++i) { + # seqs[i].it = (PyObject*)NULL; + # seqs[i].saw_StopIteration = 0; + # } + # + # /* Do a first pass to obtain iterators for the arguments, and set len + # * to the largest of their lengths. + # */ + # len = 0; + # for (i = 0, sqp = seqs; i < n; ++i, ++sqp) { + # PyObject *curseq; + # Py_ssize_t curlen; + # + # /* Get iterator. */ + # curseq = PyTuple_GetItem(args, i+1); + # sqp->it = PyObject_GetIter(curseq); + # if (sqp->it == NULL) { + # static char errmsg[] = + # "argument %d to map() must support iteration"; + # char errbuf[sizeof(errmsg) + 25]; + # PyOS_snprintf(errbuf, sizeof(errbuf), errmsg, i+2); + # PyErr_SetString(PyExc_TypeError, errbuf); + # goto Fail_2; + # } + # + # /* Update len. */ + # curlen = _PyObject_LengthHint(curseq, 8); + # if (curlen > len) + # len = curlen; + # } + # + # /* Get space for the result list. */ + # if ((result = (PyObject *) PyList_New(len)) == NULL) + # goto Fail_2; + # + # /* Iterate over the sequences until all have stopped. */ + # for (i = 0; ; ++i) { + # PyObject *alist, *item=NULL, *value; + # int numactive = 0; + # + # if (func == Py_None && n == 1) + # alist = NULL; + # else if ((alist = PyTuple_New(n)) == NULL) + # goto Fail_1; + # + # for (j = 0, sqp = seqs; j < n; ++j, ++sqp) { + # if (sqp->saw_StopIteration) { + # Py_INCREF(Py_None); + # item = Py_None; + # } + # else { + # item = PyIter_Next(sqp->it); + # if (item) + # ++numactive; + # else { + # if (PyErr_Occurred()) { + # Py_XDECREF(alist); + # goto Fail_1; + # } + # Py_INCREF(Py_None); + # item = Py_None; + # sqp->saw_StopIteration = 1; + # } + # } + # if (alist) + # PyTuple_SET_ITEM(alist, j, item); + # else + # break; + # } + # + # if (!alist) + # alist = item; + # + # if (numactive == 0) { + # Py_DECREF(alist); + # break; + # } + # + # if (func == Py_None) + # value = alist; + # else { + # value = PyEval_CallObject(func, alist); + # Py_DECREF(alist); + # if (value == NULL) + # goto Fail_1; + # } + # if (i >= len) { + # int status = PyList_Append(result, value); + # Py_DECREF(value); + # if (status < 0) + # goto Fail_1; + # } + # else if (PyList_SetItem(result, i, value) < 0) + # goto Fail_1; + # } + # + # if (i < len && PyList_SetSlice(result, i, len, NULL) < 0) + # goto Fail_1; + # + # goto Succeed; + # + # Fail_1: + # Py_DECREF(result); + # Fail_2: + # result = NULL; + # Succeed: + # assert(seqs); + # for (i = 0; i < n; ++i) + # Py_XDECREF(seqs[i].it); + # PyMem_DEL(seqs); + # return result; + # } + + def oldrange(*args, **kwargs): + return list(builtins.range(*args, **kwargs)) + + def oldzip(*args, **kwargs): + return list(builtins.zip(*args, **kwargs)) + + filter = oldfilter + map = oldmap + range = oldrange + from functools import reduce + zip = oldzip + __all__ = ['filter', 'map', 'range', 'reduce', 'zip'] + +else: + import __builtin__ + # Python 2-builtin ranges produce lists + filter = __builtin__.filter + map = __builtin__.map + range = __builtin__.range + reduce = __builtin__.reduce + zip = __builtin__.zip + __all__ = [] diff --git a/src/past/translation/__init__.py b/src/past/translation/__init__.py new file mode 100644 index 00000000..ae6c0d90 --- /dev/null +++ b/src/past/translation/__init__.py @@ -0,0 +1,453 @@ +# -*- coding: utf-8 -*- +""" +past.translation +================== + +The ``past.translation`` package provides an import hook for Python 3 which +transparently runs ``futurize`` fixers over Python 2 code on import to convert +print statements into functions, etc. + +It is intended to assist users in migrating to Python 3.x even if some +dependencies still only support Python 2.x. + +Usage +----- + +Once your Py2 package is installed in the usual module search path, the import +hook is invoked as follows: + + >>> from past.translation import autotranslate + >>> autotranslate('mypackagename') + +Or: + + >>> autotranslate(['mypackage1', 'mypackage2']) + +You can unregister the hook using:: + + >>> from past.translation import remove_hooks + >>> remove_hooks() + +Author: Ed Schofield. +Inspired by and based on ``uprefix`` by Vinay M. Sajip. +""" + +import sys +# imp was deprecated in python 3.6 +if sys.version_info >= (3, 6): + import importlib as imp +else: + import imp +import logging +import os +import copy +from lib2to3.pgen2.parse import ParseError +from lib2to3.refactor import RefactoringTool + +from libfuturize import fixes + +try: + from importlib.machinery import ( + PathFinder, + SourceFileLoader, + ) +except ImportError: + PathFinder = None + SourceFileLoader = object + +if sys.version_info[:2] < (3, 4): + import imp + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +myfixes = (list(fixes.libfuturize_fix_names_stage1) + + list(fixes.lib2to3_fix_names_stage1) + + list(fixes.libfuturize_fix_names_stage2) + + list(fixes.lib2to3_fix_names_stage2)) + + +# We detect whether the code is Py2 or Py3 by applying certain lib2to3 fixers +# to it. If the diff is empty, it's Python 3 code. + +py2_detect_fixers = [ +# From stage 1: + 'lib2to3.fixes.fix_apply', + # 'lib2to3.fixes.fix_dict', # TODO: add support for utils.viewitems() etc. and move to stage2 + 'lib2to3.fixes.fix_except', + 'lib2to3.fixes.fix_execfile', + 'lib2to3.fixes.fix_exitfunc', + 'lib2to3.fixes.fix_funcattrs', + 'lib2to3.fixes.fix_filter', + 'lib2to3.fixes.fix_has_key', + 'lib2to3.fixes.fix_idioms', + 'lib2to3.fixes.fix_import', # makes any implicit relative imports explicit. (Use with ``from __future__ import absolute_import) + 'lib2to3.fixes.fix_intern', + 'lib2to3.fixes.fix_isinstance', + 'lib2to3.fixes.fix_methodattrs', + 'lib2to3.fixes.fix_ne', + 'lib2to3.fixes.fix_numliterals', # turns 1L into 1, 0755 into 0o755 + 'lib2to3.fixes.fix_paren', + 'lib2to3.fixes.fix_print', + 'lib2to3.fixes.fix_raise', # uses incompatible with_traceback() method on exceptions + 'lib2to3.fixes.fix_renames', + 'lib2to3.fixes.fix_reduce', + # 'lib2to3.fixes.fix_set_literal', # this is unnecessary and breaks Py2.6 support + 'lib2to3.fixes.fix_repr', + 'lib2to3.fixes.fix_standarderror', + 'lib2to3.fixes.fix_sys_exc', + 'lib2to3.fixes.fix_throw', + 'lib2to3.fixes.fix_tuple_params', + 'lib2to3.fixes.fix_types', + 'lib2to3.fixes.fix_ws_comma', + 'lib2to3.fixes.fix_xreadlines', + +# From stage 2: + 'lib2to3.fixes.fix_basestring', + # 'lib2to3.fixes.fix_buffer', # perhaps not safe. Test this. + # 'lib2to3.fixes.fix_callable', # not needed in Py3.2+ + # 'lib2to3.fixes.fix_dict', # TODO: add support for utils.viewitems() etc. + 'lib2to3.fixes.fix_exec', + # 'lib2to3.fixes.fix_future', # we don't want to remove __future__ imports + 'lib2to3.fixes.fix_getcwdu', + # 'lib2to3.fixes.fix_imports', # called by libfuturize.fixes.fix_future_standard_library + # 'lib2to3.fixes.fix_imports2', # we don't handle this yet (dbm) + # 'lib2to3.fixes.fix_input', + # 'lib2to3.fixes.fix_itertools', + # 'lib2to3.fixes.fix_itertools_imports', + 'lib2to3.fixes.fix_long', + # 'lib2to3.fixes.fix_map', + # 'lib2to3.fixes.fix_metaclass', # causes SyntaxError in Py2! Use the one from ``six`` instead + 'lib2to3.fixes.fix_next', + 'lib2to3.fixes.fix_nonzero', # TODO: add a decorator for mapping __bool__ to __nonzero__ + # 'lib2to3.fixes.fix_operator', # we will need support for this by e.g. extending the Py2 operator module to provide those functions in Py3 + 'lib2to3.fixes.fix_raw_input', + # 'lib2to3.fixes.fix_unicode', # strips off the u'' prefix, which removes a potentially helpful source of information for disambiguating unicode/byte strings + # 'lib2to3.fixes.fix_urllib', + 'lib2to3.fixes.fix_xrange', + # 'lib2to3.fixes.fix_zip', +] + + +class RTs: + """ + A namespace for the refactoring tools. This avoids creating these at + the module level, which slows down the module import. (See issue #117). + + There are two possible grammars: with or without the print statement. + Hence we have two possible refactoring tool implementations. + """ + _rt = None + _rtp = None + _rt_py2_detect = None + _rtp_py2_detect = None + + @staticmethod + def setup(): + """ + Call this before using the refactoring tools to create them on demand + if needed. + """ + if None in [RTs._rt, RTs._rtp]: + RTs._rt = RefactoringTool(myfixes) + RTs._rtp = RefactoringTool(myfixes, {'print_function': True}) + + + @staticmethod + def setup_detect_python2(): + """ + Call this before using the refactoring tools to create them on demand + if needed. + """ + if None in [RTs._rt_py2_detect, RTs._rtp_py2_detect]: + RTs._rt_py2_detect = RefactoringTool(py2_detect_fixers) + RTs._rtp_py2_detect = RefactoringTool(py2_detect_fixers, + {'print_function': True}) + + +# We need to find a prefix for the standard library, as we don't want to +# process any files there (they will already be Python 3). +# +# The following method is used by Sanjay Vinip in uprefix. This fails for +# ``conda`` environments: +# # In a non-pythonv virtualenv, sys.real_prefix points to the installed Python. +# # In a pythonv venv, sys.base_prefix points to the installed Python. +# # Outside a virtual environment, sys.prefix points to the installed Python. + +# if hasattr(sys, 'real_prefix'): +# _syslibprefix = sys.real_prefix +# else: +# _syslibprefix = getattr(sys, 'base_prefix', sys.prefix) + +# Instead, we use the portion of the path common to both the stdlib modules +# ``math`` and ``urllib``. + +def splitall(path): + """ + Split a path into all components. From Python Cookbook. + """ + allparts = [] + while True: + parts = os.path.split(path) + if parts[0] == path: # sentinel for absolute paths + allparts.insert(0, parts[0]) + break + elif parts[1] == path: # sentinel for relative paths + allparts.insert(0, parts[1]) + break + else: + path = parts[0] + allparts.insert(0, parts[1]) + return allparts + + +def common_substring(s1, s2): + """ + Returns the longest common substring to the two strings, starting from the + left. + """ + chunks = [] + path1 = splitall(s1) + path2 = splitall(s2) + for (dir1, dir2) in zip(path1, path2): + if dir1 != dir2: + break + chunks.append(dir1) + return os.path.join(*chunks) + +# _stdlibprefix = common_substring(math.__file__, urllib.__file__) + + +def detect_python2(source, pathname): + """ + Returns a bool indicating whether we think the code is Py2 + """ + RTs.setup_detect_python2() + try: + tree = RTs._rt_py2_detect.refactor_string(source, pathname) + except ParseError as e: + if e.msg != 'bad input' or e.value != '=': + raise + tree = RTs._rtp.refactor_string(source, pathname) + + if source != str(tree)[:-1]: # remove added newline + # The above fixers made changes, so we conclude it's Python 2 code + logger.debug('Detected Python 2 code: {0}'.format(pathname)) + return True + else: + logger.debug('Detected Python 3 code: {0}'.format(pathname)) + return False + + +def transform(source, pathname): + # This implementation uses lib2to3, + # you can override and use something else + # if that's better for you + + # lib2to3 likes a newline at the end + RTs.setup() + source += '\n' + try: + tree = RTs._rt.refactor_string(source, pathname) + except ParseError as e: + if e.msg != 'bad input' or e.value != '=': + raise + tree = RTs._rtp.refactor_string(source, pathname) + # could optimise a bit for only doing str(tree) if + # getattr(tree, 'was_changed', False) returns True + return str(tree)[:-1] # remove added newline + + +class PastSourceFileLoader(SourceFileLoader): + exclude_paths = [] + include_paths = [] + + def _convert_needed(self): + fullname = self.name + if any(fullname.startswith(path) for path in self.exclude_paths): + convert = False + elif any(fullname.startswith(path) for path in self.include_paths): + convert = True + else: + convert = False + return convert + + def _exec_transformed_module(self, module): + source = self.get_source(self.name) + pathname = self.path + if detect_python2(source, pathname): + source = transform(source, pathname) + code = compile(source, pathname, "exec") + exec(code, module.__dict__) + + # For Python 3.3 + def load_module(self, fullname): + logger.debug("Running load_module for %s", fullname) + if fullname in sys.modules: + mod = sys.modules[fullname] + else: + if self._convert_needed(): + logger.debug("Autoconverting %s", fullname) + mod = imp.new_module(fullname) + sys.modules[fullname] = mod + + # required by PEP 302 + mod.__file__ = self.path + mod.__loader__ = self + if self.is_package(fullname): + mod.__path__ = [] + mod.__package__ = fullname + else: + mod.__package__ = fullname.rpartition('.')[0] + self._exec_transformed_module(mod) + else: + mod = super().load_module(fullname) + return mod + + # For Python >=3.4 + def exec_module(self, module): + logger.debug("Running exec_module for %s", module) + if self._convert_needed(): + logger.debug("Autoconverting %s", self.name) + self._exec_transformed_module(module) + else: + super().exec_module(module) + + +class Py2Fixer(object): + """ + An import hook class that uses lib2to3 for source-to-source translation of + Py2 code to Py3. + """ + + # See the comments on :class:future.standard_library.RenameImport. + # We add this attribute here so remove_hooks() and install_hooks() can + # unambiguously detect whether the import hook is installed: + PY2FIXER = True + + def __init__(self): + self.found = None + self.base_exclude_paths = ['future', 'past'] + self.exclude_paths = copy.copy(self.base_exclude_paths) + self.include_paths = [] + + def include(self, paths): + """ + Pass in a sequence of module names such as 'plotrique.plotting' that, + if present at the leftmost side of the full package name, would + specify the module to be transformed from Py2 to Py3. + """ + self.include_paths += paths + + def exclude(self, paths): + """ + Pass in a sequence of strings such as 'mymodule' that, if + present at the leftmost side of the full package name, would cause + the module not to undergo any source transformation. + """ + self.exclude_paths += paths + + # For Python 3.3 + def find_module(self, fullname, path=None): + logger.debug("Running find_module: (%s, %s)", fullname, path) + loader = PathFinder.find_module(fullname, path) + if not loader: + logger.debug("Py2Fixer could not find %s", fullname) + return None + loader.__class__ = PastSourceFileLoader + loader.exclude_paths = self.exclude_paths + loader.include_paths = self.include_paths + return loader + + # For Python >=3.4 + def find_spec(self, fullname, path=None, target=None): + logger.debug("Running find_spec: (%s, %s, %s)", fullname, path, target) + spec = PathFinder.find_spec(fullname, path, target) + if not spec: + logger.debug("Py2Fixer could not find %s", fullname) + return None + spec.loader.__class__ = PastSourceFileLoader + spec.loader.exclude_paths = self.exclude_paths + spec.loader.include_paths = self.include_paths + return spec + + +_hook = Py2Fixer() + + +def install_hooks(include_paths=(), exclude_paths=()): + if isinstance(include_paths, str): + include_paths = (include_paths,) + if isinstance(exclude_paths, str): + exclude_paths = (exclude_paths,) + assert len(include_paths) + len(exclude_paths) > 0, 'Pass at least one argument' + _hook.include(include_paths) + _hook.exclude(exclude_paths) + # _hook.debug = debug + enable = sys.version_info[0] >= 3 # enabled for all 3.x+ + if enable and _hook not in sys.meta_path: + sys.meta_path.insert(0, _hook) # insert at beginning. This could be made a parameter + + # We could return the hook when there are ways of configuring it + #return _hook + + +def remove_hooks(): + if _hook in sys.meta_path: + sys.meta_path.remove(_hook) + + +def detect_hooks(): + """ + Returns True if the import hooks are installed, False if not. + """ + return _hook in sys.meta_path + # present = any([hasattr(hook, 'PY2FIXER') for hook in sys.meta_path]) + # return present + + +class hooks(object): + """ + Acts as a context manager. Use like this: + + >>> from past import translation + >>> with translation.hooks(): + ... import mypy2module + >>> import requests # py2/3 compatible anyway + >>> # etc. + """ + def __enter__(self): + self.hooks_were_installed = detect_hooks() + install_hooks() + return self + + def __exit__(self, *args): + if not self.hooks_were_installed: + remove_hooks() + + +class suspend_hooks(object): + """ + Acts as a context manager. Use like this: + + >>> from past import translation + >>> translation.install_hooks() + >>> import http.client + >>> # ... + >>> with translation.suspend_hooks(): + >>> import requests # or others that support Py2/3 + + If the hooks were disabled before the context, they are not installed when + the context is left. + """ + def __enter__(self): + self.hooks_were_installed = detect_hooks() + remove_hooks() + return self + def __exit__(self, *args): + if self.hooks_were_installed: + install_hooks() + + +# alias +autotranslate = install_hooks diff --git a/src/past/types/__init__.py b/src/past/types/__init__.py new file mode 100644 index 00000000..91dd270f --- /dev/null +++ b/src/past/types/__init__.py @@ -0,0 +1,29 @@ +""" +Forward-ports of types from Python 2 for use with Python 3: + +- ``basestring``: equivalent to ``(str, bytes)`` in ``isinstance`` checks +- ``dict``: with list-producing .keys() etc. methods +- ``str``: bytes-like, but iterating over them doesn't product integers +- ``long``: alias of Py3 int with ``L`` suffix in the ``repr`` +- ``unicode``: alias of Py3 str with ``u`` prefix in the ``repr`` + +""" + +from past import utils + +if utils.PY2: + import __builtin__ + basestring = __builtin__.basestring + dict = __builtin__.dict + str = __builtin__.str + long = __builtin__.long + unicode = __builtin__.unicode + __all__ = [] +else: + from .basestring import basestring + from .olddict import olddict + from .oldstr import oldstr + long = int + unicode = str + # from .unicode import unicode + __all__ = ['basestring', 'olddict', 'oldstr', 'long', 'unicode'] diff --git a/src/past/types/basestring.py b/src/past/types/basestring.py new file mode 100644 index 00000000..9c21715a --- /dev/null +++ b/src/past/types/basestring.py @@ -0,0 +1,38 @@ +""" +An implementation of the basestring type for Python 3 + +Example use: + +>>> s = b'abc' +>>> assert isinstance(s, basestring) +>>> from past.types import str as oldstr +>>> s2 = oldstr(b'abc') +>>> assert isinstance(s2, basestring) + +""" + +import sys + +from past.utils import with_metaclass, PY2 + +if PY2: + str = unicode + +ver = sys.version_info[:2] + + +class BaseBaseString(type): + def __instancecheck__(cls, instance): + return isinstance(instance, (bytes, str)) + + def __subclasscheck__(cls, subclass): + return super(BaseBaseString, cls).__subclasscheck__(subclass) or issubclass(subclass, (bytes, str)) + + +class basestring(with_metaclass(BaseBaseString)): + """ + A minimal backport of the Python 2 basestring type to Py3 + """ + + +__all__ = ['basestring'] diff --git a/src/past/types/olddict.py b/src/past/types/olddict.py new file mode 100644 index 00000000..f4f92a26 --- /dev/null +++ b/src/past/types/olddict.py @@ -0,0 +1,96 @@ +""" +A dict subclass for Python 3 that behaves like Python 2's dict + +Example use: + +>>> from past.builtins import dict +>>> d1 = dict() # instead of {} for an empty dict +>>> d2 = dict(key1='value1', key2='value2') + +The keys, values and items methods now return lists on Python 3.x and there are +methods for iterkeys, itervalues, iteritems, and viewkeys etc. + +>>> for d in (d1, d2): +... assert isinstance(d.keys(), list) +... assert isinstance(d.values(), list) +... assert isinstance(d.items(), list) +""" + +import sys + +from past.utils import with_metaclass + + +_builtin_dict = dict +ver = sys.version_info[:2] + + +class BaseOldDict(type): + def __instancecheck__(cls, instance): + return isinstance(instance, _builtin_dict) + + +class olddict(with_metaclass(BaseOldDict, _builtin_dict)): + """ + A backport of the Python 3 dict object to Py2 + """ + iterkeys = _builtin_dict.keys + viewkeys = _builtin_dict.keys + + def keys(self): + return list(super(olddict, self).keys()) + + itervalues = _builtin_dict.values + viewvalues = _builtin_dict.values + + def values(self): + return list(super(olddict, self).values()) + + iteritems = _builtin_dict.items + viewitems = _builtin_dict.items + + def items(self): + return list(super(olddict, self).items()) + + def has_key(self, k): + """ + D.has_key(k) -> True if D has a key k, else False + """ + return k in self + + # def __new__(cls, *args, **kwargs): + # """ + # dict() -> new empty dictionary + # dict(mapping) -> new dictionary initialized from a mapping object's + # (key, value) pairs + # dict(iterable) -> new dictionary initialized as if via: + # d = {} + # for k, v in iterable: + # d[k] = v + # dict(**kwargs) -> new dictionary initialized with the name=value pairs + # in the keyword argument list. For example: dict(one=1, two=2) + + # """ + # + # if len(args) == 0: + # return super(olddict, cls).__new__(cls) + # # Was: elif isinstance(args[0], newbytes): + # # We use type() instead of the above because we're redefining + # # this to be True for all unicode string subclasses. Warning: + # # This may render newstr un-subclassable. + # elif type(args[0]) == olddict: + # return args[0] + # # elif isinstance(args[0], _builtin_dict): + # # value = args[0] + # else: + # value = args[0] + # return super(olddict, cls).__new__(cls, value) + + def __native__(self): + """ + Hook for the past.utils.native() function + """ + return super(oldbytes, self) + + +__all__ = ['olddict'] diff --git a/src/past/types/oldstr.py b/src/past/types/oldstr.py new file mode 100644 index 00000000..5a0e3789 --- /dev/null +++ b/src/past/types/oldstr.py @@ -0,0 +1,135 @@ +""" +Pure-Python implementation of a Python 2-like str object for Python 3. +""" + +from numbers import Integral + +from past.utils import PY2, with_metaclass + +if PY2: + from collections import Iterable +else: + from collections.abc import Iterable + +_builtin_bytes = bytes + + +class BaseOldStr(type): + def __instancecheck__(cls, instance): + return isinstance(instance, _builtin_bytes) + + +def unescape(s): + r""" + Interprets strings with escape sequences + + Example: + >>> s = unescape(r'abc\\def') # i.e. 'abc\\\\def' + >>> print(s) + 'abc\def' + >>> s2 = unescape('abc\\ndef') + >>> len(s2) + 8 + >>> print(s2) + abc + def + """ + return s.encode().decode('unicode_escape') + + +class oldstr(with_metaclass(BaseOldStr, _builtin_bytes)): + """ + A forward port of the Python 2 8-bit string object to Py3 + """ + # Python 2 strings have no __iter__ method: + @property + def __iter__(self): + raise AttributeError + + def __dir__(self): + return [thing for thing in dir(_builtin_bytes) if thing != '__iter__'] + + # def __new__(cls, *args, **kwargs): + # """ + # From the Py3 bytes docstring: + + # bytes(iterable_of_ints) -> bytes + # bytes(string, encoding[, errors]) -> bytes + # bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer + # bytes(int) -> bytes object of size given by the parameter initialized with null bytes + # bytes() -> empty bytes object + # + # Construct an immutable array of bytes from: + # - an iterable yielding integers in range(256) + # - a text string encoded using the specified encoding + # - any object implementing the buffer API. + # - an integer + # """ + # + # if len(args) == 0: + # return super(newbytes, cls).__new__(cls) + # # Was: elif isinstance(args[0], newbytes): + # # We use type() instead of the above because we're redefining + # # this to be True for all unicode string subclasses. Warning: + # # This may render newstr un-subclassable. + # elif type(args[0]) == newbytes: + # return args[0] + # elif isinstance(args[0], _builtin_bytes): + # value = args[0] + # elif isinstance(args[0], unicode): + # if 'encoding' not in kwargs: + # raise TypeError('unicode string argument without an encoding') + # ### + # # Was: value = args[0].encode(**kwargs) + # # Python 2.6 string encode() method doesn't take kwargs: + # # Use this instead: + # newargs = [kwargs['encoding']] + # if 'errors' in kwargs: + # newargs.append(kwargs['errors']) + # value = args[0].encode(*newargs) + # ### + # elif isinstance(args[0], Iterable): + # if len(args[0]) == 0: + # # What is this? + # raise ValueError('unknown argument type') + # elif len(args[0]) > 0 and isinstance(args[0][0], Integral): + # # It's a list of integers + # value = b''.join([chr(x) for x in args[0]]) + # else: + # raise ValueError('item cannot be interpreted as an integer') + # elif isinstance(args[0], Integral): + # if args[0] < 0: + # raise ValueError('negative count') + # value = b'\x00' * args[0] + # else: + # value = args[0] + # return super(newbytes, cls).__new__(cls, value) + + def __repr__(self): + s = super(oldstr, self).__repr__() # e.g. b'abc' on Py3, b'abc' on Py3 + return s[1:] + + def __str__(self): + s = super(oldstr, self).__str__() # e.g. "b'abc'" or "b'abc\\ndef' + # TODO: fix this: + assert s[:2] == "b'" and s[-1] == "'" + return unescape(s[2:-1]) # e.g. 'abc' or 'abc\ndef' + + def __getitem__(self, y): + if isinstance(y, Integral): + return super(oldstr, self).__getitem__(slice(y, y+1)) + else: + return super(oldstr, self).__getitem__(y) + + def __getslice__(self, *args): + return self.__getitem__(slice(*args)) + + def __contains__(self, key): + if isinstance(key, int): + return False + + def __native__(self): + return bytes(self) + + +__all__ = ['oldstr'] diff --git a/src/past/utils/__init__.py b/src/past/utils/__init__.py new file mode 100644 index 00000000..f6b2642d --- /dev/null +++ b/src/past/utils/__init__.py @@ -0,0 +1,97 @@ +""" +Various non-built-in utility functions and definitions for Py2 +compatibility in Py3. + +For example: + + >>> # The old_div() function behaves like Python 2's / operator + >>> # without "from __future__ import division" + >>> from past.utils import old_div + >>> old_div(3, 2) # like 3/2 in Py2 + 0 + >>> old_div(3, 2.0) # like 3/2.0 in Py2 + 1.5 +""" + +import sys +import numbers + +PY3 = sys.version_info[0] >= 3 +PY2 = sys.version_info[0] == 2 +PYPY = hasattr(sys, 'pypy_translation_info') + + +def with_metaclass(meta, *bases): + """ + Function from jinja2/_compat.py. License: BSD. + + Use it like this:: + + class BaseForm(object): + pass + + class FormType(type): + pass + + class Form(with_metaclass(FormType, BaseForm)): + pass + + This requires a bit of explanation: the basic idea is to make a + dummy metaclass for one level of class instantiation that replaces + itself with the actual metaclass. Because of internal type checks + we also need to make sure that we downgrade the custom metaclass + for one level to something closer to type (that's why __call__ and + __init__ comes back from type etc.). + + This has the advantage over six.with_metaclass of not introducing + dummy classes into the final MRO. + """ + class metaclass(meta): + __call__ = type.__call__ + __init__ = type.__init__ + def __new__(cls, name, this_bases, d): + if this_bases is None: + return type.__new__(cls, name, (), d) + return meta(name, bases, d) + return metaclass('temporary_class', None, {}) + + +def native(obj): + """ + On Py2, this is a no-op: native(obj) -> obj + + On Py3, returns the corresponding native Py3 types that are + superclasses for forward-ported objects from Py2: + + >>> from past.builtins import str, dict + + >>> native(str(b'ABC')) # Output on Py3 follows. On Py2, output is 'ABC' + b'ABC' + >>> type(native(str(b'ABC'))) + bytes + + Existing native types on Py3 will be returned unchanged: + + >>> type(native(b'ABC')) + bytes + """ + if hasattr(obj, '__native__'): + return obj.__native__() + else: + return obj + + +# An alias for future.utils.old_div(): +def old_div(a, b): + """ + Equivalent to ``a / b`` on Python 2 without ``from __future__ import + division``. + + TODO: generalize this to other objects (like arrays etc.) + """ + if isinstance(a, numbers.Integral) and isinstance(b, numbers.Integral): + return a // b + else: + return a / b + +__all__ = ['PY3', 'PY2', 'PYPY', 'with_metaclass', 'native', 'old_div'] diff --git a/src/queue/__init__.py b/src/queue/__init__.py new file mode 100644 index 00000000..22bd296b --- /dev/null +++ b/src/queue/__init__.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import +import sys +__future_module__ = True + +if sys.version_info[0] < 3: + from Queue import * +else: + raise ImportError('This package should not be accessible on Python 3. ' + 'Either you are trying to run from the python-future src folder ' + 'or your installation of python-future is corrupted.') diff --git a/src/reprlib/__init__.py b/src/reprlib/__init__.py new file mode 100644 index 00000000..6ccf9c00 --- /dev/null +++ b/src/reprlib/__init__.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import +import sys + +if sys.version_info[0] < 3: + from repr import * +else: + raise ImportError('This package should not be accessible on Python 3. ' + 'Either you are trying to run from the python-future src folder ' + 'or your installation of python-future is corrupted.') diff --git a/src/socketserver/__init__.py b/src/socketserver/__init__.py new file mode 100644 index 00000000..c5b8c9c2 --- /dev/null +++ b/src/socketserver/__init__.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import +import sys + +if sys.version_info[0] < 3: + from SocketServer import * +else: + raise ImportError('This package should not be accessible on Python 3. ' + 'Either you are trying to run from the python-future src folder ' + 'or your installation of python-future is corrupted.') diff --git a/src/tkinter/__init__.py b/src/tkinter/__init__.py new file mode 100644 index 00000000..bb730c35 --- /dev/null +++ b/src/tkinter/__init__.py @@ -0,0 +1,28 @@ +from __future__ import absolute_import +import sys + +if sys.version_info[0] < 3: + from Tkinter import * + from Tkinter import (_cnfmerge, _default_root, _flatten, + _support_default_root, _test, + _tkinter, _setit) + + try: # >= 2.7.4 + from Tkinter import (_join) + except ImportError: + pass + + try: # >= 2.7.4 + from Tkinter import (_stringify) + except ImportError: + pass + + try: # >= 2.7.9 + from Tkinter import (_splitdict) + except ImportError: + pass + +else: + raise ImportError('This package should not be accessible on Python 3. ' + 'Either you are trying to run from the python-future src folder ' + 'or your installation of python-future is corrupted.') diff --git a/src/tkinter/colorchooser.py b/src/tkinter/colorchooser.py new file mode 100644 index 00000000..6dde6e8d --- /dev/null +++ b/src/tkinter/colorchooser.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.colorchooser import * +else: + try: + from tkColorChooser import * + except ImportError: + raise ImportError('The tkColorChooser module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/tkinter/commondialog.py b/src/tkinter/commondialog.py new file mode 100644 index 00000000..eb7ae8d6 --- /dev/null +++ b/src/tkinter/commondialog.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.commondialog import * +else: + try: + from tkCommonDialog import * + except ImportError: + raise ImportError('The tkCommonDialog module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/tkinter/constants.py b/src/tkinter/constants.py new file mode 100644 index 00000000..ffe09815 --- /dev/null +++ b/src/tkinter/constants.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.constants import * +else: + try: + from Tkconstants import * + except ImportError: + raise ImportError('The Tkconstants module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/tkinter/dialog.py b/src/tkinter/dialog.py new file mode 100644 index 00000000..113370ca --- /dev/null +++ b/src/tkinter/dialog.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.dialog import * +else: + try: + from Dialog import * + except ImportError: + raise ImportError('The Dialog module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/tkinter/dnd.py b/src/tkinter/dnd.py new file mode 100644 index 00000000..1ab43791 --- /dev/null +++ b/src/tkinter/dnd.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.dnd import * +else: + try: + from Tkdnd import * + except ImportError: + raise ImportError('The Tkdnd module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/tkinter/filedialog.py b/src/tkinter/filedialog.py new file mode 100644 index 00000000..93a15388 --- /dev/null +++ b/src/tkinter/filedialog.py @@ -0,0 +1,17 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.filedialog import * +else: + try: + from FileDialog import * + except ImportError: + raise ImportError('The FileDialog module is missing. Does your Py2 ' + 'installation include tkinter?') + try: + from tkFileDialog import * + except ImportError: + raise ImportError('The tkFileDialog module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/tkinter/font.py b/src/tkinter/font.py new file mode 100644 index 00000000..628f399a --- /dev/null +++ b/src/tkinter/font.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.font import * +else: + try: + from tkFont import * + except ImportError: + raise ImportError('The tkFont module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/tkinter/messagebox.py b/src/tkinter/messagebox.py new file mode 100644 index 00000000..b43d8702 --- /dev/null +++ b/src/tkinter/messagebox.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.messagebox import * +else: + try: + from tkMessageBox import * + except ImportError: + raise ImportError('The tkMessageBox module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/tkinter/scrolledtext.py b/src/tkinter/scrolledtext.py new file mode 100644 index 00000000..1c69db60 --- /dev/null +++ b/src/tkinter/scrolledtext.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.scrolledtext import * +else: + try: + from ScrolledText import * + except ImportError: + raise ImportError('The ScrolledText module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/tkinter/simpledialog.py b/src/tkinter/simpledialog.py new file mode 100644 index 00000000..dba93fbf --- /dev/null +++ b/src/tkinter/simpledialog.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.simpledialog import * +else: + try: + from SimpleDialog import * + except ImportError: + raise ImportError('The SimpleDialog module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/tkinter/tix.py b/src/tkinter/tix.py new file mode 100644 index 00000000..8d1718ad --- /dev/null +++ b/src/tkinter/tix.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.tix import * +else: + try: + from Tix import * + except ImportError: + raise ImportError('The Tix module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/tkinter/ttk.py b/src/tkinter/ttk.py new file mode 100644 index 00000000..081c1b49 --- /dev/null +++ b/src/tkinter/ttk.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.ttk import * +else: + try: + from ttk import * + except ImportError: + raise ImportError('The ttk module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/src/winreg/__init__.py b/src/winreg/__init__.py new file mode 100644 index 00000000..97243bbb --- /dev/null +++ b/src/winreg/__init__.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import +import sys +__future_module__ = True + +if sys.version_info[0] < 3: + from _winreg import * +else: + raise ImportError('This package should not be accessible on Python 3. ' + 'Either you are trying to run from the python-future src folder ' + 'or your installation of python-future is corrupted.') diff --git a/src/xmlrpc/__init__.py b/src/xmlrpc/__init__.py new file mode 100644 index 00000000..e4f853e5 --- /dev/null +++ b/src/xmlrpc/__init__.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import +import sys + +if sys.version_info[0] < 3: + pass +else: + raise ImportError('This package should not be accessible on Python 3. ' + 'Either you are trying to run from the python-future src folder ' + 'or your installation of python-future is corrupted.') diff --git a/src/xmlrpc/client.py b/src/xmlrpc/client.py new file mode 100644 index 00000000..a8d0827e --- /dev/null +++ b/src/xmlrpc/client.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import +import sys + +assert sys.version_info[0] < 3 +from xmlrpclib import * diff --git a/src/xmlrpc/server.py b/src/xmlrpc/server.py new file mode 100644 index 00000000..a8d0827e --- /dev/null +++ b/src/xmlrpc/server.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import +import sys + +assert sys.version_info[0] < 3 +from xmlrpclib import * diff --git a/test.sh b/test.sh new file mode 100755 index 00000000..d45e98d3 --- /dev/null +++ b/test.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -exo pipefail + +python --version + +if [ -e "/root/pip" ] +then + pip install /root/pip/*.zip /root/pip/*.whl /root/pip/*tar.gz +else + pip install pytest unittest2 +fi + +pytag="py${PYTHON_VERSION//./}" + +python setup.py bdist_wheel --python-tag="${pytag}" +pip install dist/future-*-${pytag}-none-any.whl +pytest tests/ diff --git a/tests/test_future/__init__.py b/tests/test_future/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_future/test_backports.py b/tests/test_future/test_backports.py new file mode 100644 index 00000000..5d46b115 --- /dev/null +++ b/tests/test_future/test_backports.py @@ -0,0 +1,675 @@ +# -*- coding: utf-8 -*- +""" +Tests for various backported functions and classes in ``future.backports`` +""" + +from __future__ import absolute_import, print_function + +import sys +import copy +import inspect +import pickle +from random import randrange, shuffle + +from future.backports.misc import (count, + _count, + OrderedDict, + Counter, + ChainMap, + _count_elements) +from future.utils import PY2, PY26 +from future.tests.base import unittest, skip26, expectedFailurePY27 + +if PY2: + from collections import Mapping, MutableMapping +else: + from collections.abc import Mapping, MutableMapping + + +class CountTest(unittest.TestCase): + """Test the count function.""" + + def _test_count_func(self, func): + self.assertEqual(next(func(1)), 1) + self.assertEqual(next(func(start=1)), 1) + + c = func() + self.assertEqual(next(c), 0) + self.assertEqual(next(c), 1) + self.assertEqual(next(c), 2) + c = func(1, 1) + self.assertEqual(next(c), 1) + self.assertEqual(next(c), 2) + c = func(step=1) + self.assertEqual(next(c), 0) + self.assertEqual(next(c), 1) + c = func(start=1, step=1) + self.assertEqual(next(c), 1) + self.assertEqual(next(c), 2) + + c = func(-1) + self.assertEqual(next(c), -1) + self.assertEqual(next(c), 0) + self.assertEqual(next(c), 1) + c = func(1, -1) + self.assertEqual(next(c), 1) + self.assertEqual(next(c), 0) + self.assertEqual(next(c), -1) + c = func(-1, -1) + self.assertEqual(next(c), -1) + self.assertEqual(next(c), -2) + self.assertEqual(next(c), -3) + + def test_count(self): + """Test the count function.""" + self._test_count_func(count) + + def test_own_count(self): + """Test own count implementation.""" + if PY26: + self.assertIs(count, _count) + else: + self.assertNotEqual(count, _count) + self._test_count_func(_count) + + +################################################################################ +### ChainMap (helper class for configparser and the string module) +################################################################################ + +class TestChainMap(unittest.TestCase): + + def test_basics(self): + c = ChainMap() + c['a'] = 1 + c['b'] = 2 + d = c.new_child() + d['b'] = 20 + d['c'] = 30 + self.assertEqual(d.maps, [{'b':20, 'c':30}, {'a':1, 'b':2}]) # check internal state + self.assertEqual(sorted(d.items()), + sorted(dict(a=1, b=20, c=30).items())) # check items/iter/getitem + self.assertEqual(len(d), 3) # check len + for key in 'abc': # check contains + self.assertIn(key, d) + for k, v in dict(a=1, b=20, c=30, z=100).items(): # check get + self.assertEqual(d.get(k, 100), v) + + del d['b'] # unmask a value + self.assertEqual(d.maps, [{'c':30}, {'a':1, 'b':2}]) # check internal state + self.assertEqual(sorted(d.items()), + sorted(dict(a=1, b=2, c=30).items())) # check items/iter/getitem + self.assertEqual(len(d), 3) # check len + for key in 'abc': # check contains + self.assertIn(key, d) + for k, v in dict(a=1, b=2, c=30, z=100).items(): # check get + self.assertEqual(d.get(k, 100), v) + self.assertIn(repr(d), [ # check repr + type(d).__name__ + "({'c': 30}, {'a': 1, 'b': 2})", + type(d).__name__ + "({'c': 30}, {'b': 2, 'a': 1})" + ]) + + for e in d.copy(), copy.copy(d): # check shallow copies + self.assertEqual(d, e) + self.assertEqual(d.maps, e.maps) + self.assertIsNot(d, e) + self.assertIsNot(d.maps[0], e.maps[0]) + for m1, m2 in zip(d.maps[1:], e.maps[1:]): + self.assertIs(m1, m2) + + _ChainMap = ChainMap + + for e in [pickle.loads(pickle.dumps(d)), + copy.deepcopy(d), + eval(repr(d)) + ]: # check deep copies + self.assertEqual(d, e) + self.assertEqual(d.maps, e.maps) + self.assertIsNot(d, e) + for m1, m2 in zip(d.maps, e.maps): + self.assertIsNot(m1, m2, e) + + f = d.new_child() + f['b'] = 5 + self.assertEqual(f.maps, [{'b': 5}, {'c':30}, {'a':1, 'b':2}]) + self.assertEqual(f.parents.maps, [{'c':30}, {'a':1, 'b':2}]) # check parents + self.assertEqual(f['b'], 5) # find first in chain + self.assertEqual(f.parents['b'], 2) # look beyond maps[0] + + def test_contructor(self): + self.assertEqual(ChainMap().maps, [{}]) # no-args --> one new dict + self.assertEqual(ChainMap({1:2}).maps, [{1:2}]) # 1 arg --> list + + def test_bool(self): + self.assertFalse(ChainMap()) + self.assertFalse(ChainMap({}, {})) + self.assertTrue(ChainMap({1:2}, {})) + self.assertTrue(ChainMap({}, {1:2})) + + def test_missing(self): + class DefaultChainMap(ChainMap): + def __missing__(self, key): + return 999 + d = DefaultChainMap(dict(a=1, b=2), dict(b=20, c=30)) + for k, v in dict(a=1, b=2, c=30, d=999).items(): + self.assertEqual(d[k], v) # check __getitem__ w/missing + for k, v in dict(a=1, b=2, c=30, d=77).items(): + self.assertEqual(d.get(k, 77), v) # check get() w/ missing + for k, v in dict(a=True, b=True, c=True, d=False).items(): + self.assertEqual(k in d, v) # check __contains__ w/missing + self.assertEqual(d.pop('a', 1001), 1, d) + self.assertEqual(d.pop('a', 1002), 1002) # check pop() w/missing + self.assertEqual(d.popitem(), ('b', 2)) # check popitem() w/missing + with self.assertRaises(KeyError): + d.popitem() + + def test_dict_coercion(self): + d = ChainMap(dict(a=1, b=2), dict(b=20, c=30)) + self.assertEqual(dict(d), dict(a=1, b=2, c=30)) + self.assertEqual(dict(d.items()), dict(a=1, b=2, c=30)) + + +################################################################################ +### Counter +################################################################################ + +class CounterSubclassWithSetItem(Counter): + # Test a counter subclass that overrides __setitem__ + def __init__(self, *args, **kwds): + self.called = False + Counter.__init__(self, *args, **kwds) + def __setitem__(self, key, value): + self.called = True + Counter.__setitem__(self, key, value) + +class CounterSubclassWithGet(Counter): + # Test a counter subclass that overrides get() + def __init__(self, *args, **kwds): + self.called = False + Counter.__init__(self, *args, **kwds) + def get(self, key, default): + self.called = True + return Counter.get(self, key, default) + +class TestCounter(unittest.TestCase): + + def test_basics(self): + c = Counter('abcaba') + self.assertEqual(c, Counter({'a':3 , 'b': 2, 'c': 1})) + self.assertEqual(c, Counter(a=3, b=2, c=1)) + self.assertIsInstance(c, dict) + self.assertIsInstance(c, Mapping) + self.assertTrue(issubclass(Counter, dict)) + self.assertTrue(issubclass(Counter, Mapping)) + self.assertEqual(len(c), 3) + self.assertEqual(sum(c.values()), 6) + self.assertEqual(sorted(c.values()), [1, 2, 3]) + self.assertEqual(sorted(c.keys()), ['a', 'b', 'c']) + self.assertEqual(sorted(c), ['a', 'b', 'c']) + self.assertEqual(sorted(c.items()), + [('a', 3), ('b', 2), ('c', 1)]) + self.assertEqual(c['b'], 2) + self.assertEqual(c['z'], 0) + self.assertEqual(c.__contains__('c'), True) + self.assertEqual(c.__contains__('z'), False) + self.assertEqual(c.get('b', 10), 2) + self.assertEqual(c.get('z', 10), 10) + self.assertEqual(c, dict(a=3, b=2, c=1)) + self.assertEqual(repr(c), "Counter({'a': 3, 'b': 2, 'c': 1})") + self.assertEqual(c.most_common(), [('a', 3), ('b', 2), ('c', 1)]) + for i in range(5): + self.assertEqual(c.most_common(i), + [('a', 3), ('b', 2), ('c', 1)][:i]) + self.assertEqual(''.join(sorted(c.elements())), 'aaabbc') + c['a'] += 1 # increment an existing value + c['b'] -= 2 # sub existing value to zero + del c['c'] # remove an entry + del c['c'] # make sure that del doesn't raise KeyError + c['d'] -= 2 # sub from a missing value + c['e'] = -5 # directly assign a missing value + c['f'] += 4 # add to a missing value + self.assertEqual(c, dict(a=4, b=0, d=-2, e=-5, f=4)) + self.assertEqual(''.join(sorted(c.elements())), 'aaaaffff') + self.assertEqual(c.pop('f'), 4) + self.assertNotIn('f', c) + for i in range(3): + elem, cnt = c.popitem() + self.assertNotIn(elem, c) + c.clear() + self.assertEqual(c, {}) + self.assertEqual(repr(c), 'Counter()') + self.assertRaises(NotImplementedError, Counter.fromkeys, 'abc') + self.assertRaises(TypeError, hash, c) + c.update(dict(a=5, b=3)) + c.update(c=1) + c.update(Counter('a' * 50 + 'b' * 30)) + c.update() # test case with no args + c.__init__('a' * 500 + 'b' * 300) + c.__init__('cdc') + c.__init__() + self.assertEqual(c, dict(a=555, b=333, c=3, d=1)) + self.assertEqual(c.setdefault('d', 5), 1) + self.assertEqual(c['d'], 1) + self.assertEqual(c.setdefault('e', 5), 5) + self.assertEqual(c['e'], 5) + + def test_copying(self): + # Check that counters are copyable, deepcopyable, picklable, and + #have a repr/eval round-trip + words = Counter('which witch had which witches wrist watch'.split()) + update_test = Counter() + update_test.update(words) + for i, dup in enumerate([ + words.copy(), + copy.copy(words), + copy.deepcopy(words), + pickle.loads(pickle.dumps(words, 0)), + pickle.loads(pickle.dumps(words, 1)), + pickle.loads(pickle.dumps(words, 2)), + pickle.loads(pickle.dumps(words, -1)), + eval(repr(words)), + update_test, + Counter(words), + ]): + msg = (i, dup, words) + self.assertTrue(dup is not words) + self.assertEqual(dup, words) + self.assertEqual(len(dup), len(words)) + self.assertEqual(type(dup), type(words)) + + def test_copy_subclass(self): + class MyCounter(Counter): + pass + c = MyCounter('slartibartfast') + d = c.copy() + self.assertEqual(d, c) + self.assertEqual(len(d), len(c)) + self.assertEqual(type(d), type(c)) + + def test_conversions(self): + # Convert to: set, list, dict + s = 'she sells sea shells by the sea shore' + self.assertEqual(sorted(Counter(s).elements()), sorted(s)) + self.assertEqual(sorted(Counter(s)), sorted(set(s))) + self.assertEqual(dict(Counter(s)), dict(Counter(s).items())) + self.assertEqual(set(Counter(s)), set(s)) + + def test_invariant_for_the_in_operator(self): + c = Counter(a=10, b=-2, c=0) + for elem in c: + self.assertTrue(elem in c) + self.assertIn(elem, c) + + def test_multiset_operations(self): + # Verify that adding a zero counter will strip zeros and negatives + c = Counter(a=10, b=-2, c=0) + Counter() + self.assertEqual(dict(c), dict(a=10)) + + elements = 'abcd' + for i in range(1000): + # test random pairs of multisets + p = Counter(dict((elem, randrange(-2,4)) for elem in elements)) + p.update(e=1, f=-1, g=0) + q = Counter(dict((elem, randrange(-2,4)) for elem in elements)) + q.update(h=1, i=-1, j=0) + for counterop, numberop in [ + (Counter.__add__, lambda x, y: max(0, x+y)), + (Counter.__sub__, lambda x, y: max(0, x-y)), + (Counter.__or__, lambda x, y: max(0,x,y)), + (Counter.__and__, lambda x, y: max(0, min(x,y))), + ]: + result = counterop(p, q) + for x in elements: + self.assertEqual(numberop(p[x], q[x]), result[x], + (counterop, x, p, q)) + # verify that results exclude non-positive counts + self.assertTrue(x>0 for x in result.values()) + + elements = 'abcdef' + for i in range(100): + # verify that random multisets with no repeats are exactly like sets + p = Counter(dict((elem, randrange(0, 2)) for elem in elements)) + q = Counter(dict((elem, randrange(0, 2)) for elem in elements)) + for counterop, setop in [ + (Counter.__sub__, set.__sub__), + (Counter.__or__, set.__or__), + (Counter.__and__, set.__and__), + ]: + counter_result = counterop(p, q) + set_result = setop(set(p.elements()), set(q.elements())) + self.assertEqual(counter_result, dict.fromkeys(set_result, 1)) + + @expectedFailurePY27 + def test_inplace_operations(self): + elements = 'abcd' + for i in range(1000): + # test random pairs of multisets + p = Counter(dict((elem, randrange(-2,4)) for elem in elements)) + p.update(e=1, f=-1, g=0) + q = Counter(dict((elem, randrange(-2,4)) for elem in elements)) + q.update(h=1, i=-1, j=0) + for inplace_op, regular_op in [ + (Counter.__iadd__, Counter.__add__), + (Counter.__isub__, Counter.__sub__), + (Counter.__ior__, Counter.__or__), + (Counter.__iand__, Counter.__and__), + ]: + c = p.copy() + c_id = id(c) + regular_result = regular_op(c, q) + inplace_result = inplace_op(c, q) + self.assertEqual(inplace_result, regular_result) + self.assertEqual(id(inplace_result), c_id) + + def test_subtract(self): + c = Counter(a=-5, b=0, c=5, d=10, e=15,g=40) + c.subtract(a=1, b=2, c=-3, d=10, e=20, f=30, h=-50) + self.assertEqual(c, Counter(a=-6, b=-2, c=8, d=0, e=-5, f=-30, g=40, h=50)) + c = Counter(a=-5, b=0, c=5, d=10, e=15,g=40) + c.subtract(Counter(a=1, b=2, c=-3, d=10, e=20, f=30, h=-50)) + self.assertEqual(c, Counter(a=-6, b=-2, c=8, d=0, e=-5, f=-30, g=40, h=50)) + c = Counter('aaabbcd') + c.subtract('aaaabbcce') + self.assertEqual(c, Counter(a=-1, b=0, c=-1, d=1, e=-1)) + + @expectedFailurePY27 + def test_unary(self): + c = Counter(a=-5, b=0, c=5, d=10, e=15,g=40) + self.assertEqual(dict(+c), dict(c=5, d=10, e=15, g=40)) + self.assertEqual(dict(-c), dict(a=5)) + + def test_repr_nonsortable(self): + c = Counter(a=2, b=None) + r = repr(c) + self.assertIn("'a': 2", r) + self.assertIn("'b': None", r) + + def test_helper_function(self): + # two paths, one for real dicts and one for other mappings + elems = list('abracadabra') + + d = dict() + _count_elements(d, elems) + self.assertEqual(d, {'a': 5, 'r': 2, 'b': 2, 'c': 1, 'd': 1}) + + m = OrderedDict() + _count_elements(m, elems) + self.assertEqual(m, + OrderedDict([('a', 5), ('b', 2), ('r', 2), ('c', 1), ('d', 1)])) + + # test fidelity to the pure python version + c = CounterSubclassWithSetItem('abracadabra') + self.assertTrue(c.called) + c = CounterSubclassWithGet('abracadabra') + self.assertTrue(c.called) + + +################################################################################ +### OrderedDict +################################################################################ + +class TestOrderedDict(unittest.TestCase): + + def test_init(self): + with self.assertRaises(TypeError): + OrderedDict([('a', 1), ('b', 2)], None) # too many args + pairs = [('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5)] + self.assertEqual(sorted(OrderedDict(dict(pairs)).items()), pairs) # dict input + self.assertEqual(sorted(OrderedDict(**dict(pairs)).items()), pairs) # kwds input + self.assertEqual(list(OrderedDict(pairs).items()), pairs) # pairs input + self.assertEqual(list(OrderedDict([('a', 1), ('b', 2), ('c', 9), ('d', 4)], + c=3, e=5).items()), pairs) # mixed input + + # Make sure that direct calls to __init__ do not clear previous contents + d = OrderedDict([('a', 1), ('b', 2), ('c', 3), ('d', 44), ('e', 55)]) + d.__init__([('e', 5), ('f', 6)], g=7, d=4) + self.assertEqual(list(d.items()), + [('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5), ('f', 6), ('g', 7)]) + + def test_update(self): + with self.assertRaises(TypeError): + OrderedDict().update([('a', 1), ('b', 2)], None) # too many args + pairs = [('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5)] + od = OrderedDict() + od.update(dict(pairs)) + self.assertEqual(sorted(od.items()), pairs) # dict input + od = OrderedDict() + od.update(**dict(pairs)) + self.assertEqual(sorted(od.items()), pairs) # kwds input + od = OrderedDict() + od.update(pairs) + self.assertEqual(list(od.items()), pairs) # pairs input + od = OrderedDict() + od.update([('a', 1), ('b', 2), ('c', 9), ('d', 4)], c=3, e=5) + self.assertEqual(list(od.items()), pairs) # mixed input + + ### The tests below fail on Py2.6 + if PY26: + return + # Issue 9137: Named argument called 'other' or 'self' + # shouldn't be treated specially. + od = OrderedDict() + od.update(self=23) + self.assertEqual(list(od.items()), [('self', 23)]) + od = OrderedDict() + od.update(other={}) + self.assertEqual(list(od.items()), [('other', {})]) + od = OrderedDict() + od.update(red=5, blue=6, other=7, self=8) + self.assertEqual(sorted(list(od.items())), + [('blue', 6), ('other', 7), ('red', 5), ('self', 8)]) + + # Make sure that direct calls to update do not clear previous contents + # add that updates items are not moved to the end + d = OrderedDict([('a', 1), ('b', 2), ('c', 3), ('d', 44), ('e', 55)]) + d.update([('e', 5), ('f', 6)], g=7, d=4) + self.assertEqual(list(d.items()), + [('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5), ('f', 6), ('g', 7)]) + + def test_abc(self): + self.assertIsInstance(OrderedDict(), MutableMapping) + self.assertTrue(issubclass(OrderedDict, MutableMapping)) + + def test_clear(self): + pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] + shuffle(pairs) + od = OrderedDict(pairs) + self.assertEqual(len(od), len(pairs)) + od.clear() + self.assertEqual(len(od), 0) + + def test_delitem(self): + pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] + od = OrderedDict(pairs) + del od['a'] + self.assertNotIn('a', od) + with self.assertRaises(KeyError): + del od['a'] + self.assertEqual(list(od.items()), pairs[:2] + pairs[3:]) + + def test_setitem(self): + od = OrderedDict([('d', 1), ('b', 2), ('c', 3), ('a', 4), ('e', 5)]) + od['c'] = 10 # existing element + od['f'] = 20 # new element + self.assertEqual(list(od.items()), + [('d', 1), ('b', 2), ('c', 10), ('a', 4), ('e', 5), ('f', 20)]) + + def test_iterators(self): + pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] + shuffle(pairs) + od = OrderedDict(pairs) + self.assertEqual(list(od), [t[0] for t in pairs]) + self.assertEqual(list(od.keys()), [t[0] for t in pairs]) + self.assertEqual(list(od.values()), [t[1] for t in pairs]) + self.assertEqual(list(od.items()), pairs) + self.assertEqual(list(reversed(od)), + [t[0] for t in reversed(pairs)]) + + def test_popitem(self): + pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] + shuffle(pairs) + od = OrderedDict(pairs) + while pairs: + self.assertEqual(od.popitem(), pairs.pop()) + with self.assertRaises(KeyError): + od.popitem() + self.assertEqual(len(od), 0) + + def test_pop(self): + pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] + shuffle(pairs) + od = OrderedDict(pairs) + shuffle(pairs) + while pairs: + k, v = pairs.pop() + self.assertEqual(od.pop(k), v) + with self.assertRaises(KeyError): + od.pop('xyz') + self.assertEqual(len(od), 0) + self.assertEqual(od.pop(k, 12345), 12345) + + # make sure pop still works when __missing__ is defined + class Missing(OrderedDict): + def __missing__(self, key): + return 0 + m = Missing(a=1) + self.assertEqual(m.pop('b', 5), 5) + self.assertEqual(m.pop('a', 6), 1) + self.assertEqual(m.pop('a', 6), 6) + with self.assertRaises(KeyError): + m.pop('a') + + def test_equality(self): + pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] + shuffle(pairs) + od1 = OrderedDict(pairs) + od2 = OrderedDict(pairs) + self.assertEqual(od1, od2) # same order implies equality + pairs = pairs[2:] + pairs[:2] + od2 = OrderedDict(pairs) + self.assertNotEqual(od1, od2) # different order implies inequality + # comparison to regular dict is not order sensitive + self.assertEqual(od1, dict(od2)) + self.assertEqual(dict(od2), od1) + # different length implied inequality + self.assertNotEqual(od1, OrderedDict(pairs[:-1])) + + def test_copying(self): + # Check that ordered dicts are copyable, deepcopyable, picklable, + # and have a repr/eval round-trip + pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] + od = OrderedDict(pairs) + update_test = OrderedDict() + update_test.update(od) + for i, dup in enumerate([ + od.copy(), + copy.copy(od), + copy.deepcopy(od), + pickle.loads(pickle.dumps(od, 0)), + pickle.loads(pickle.dumps(od, 1)), + pickle.loads(pickle.dumps(od, 2)), + # pickle.loads(pickle.dumps(od, 3)), + pickle.loads(pickle.dumps(od, -1)), + eval(repr(od)), + update_test, + OrderedDict(od), + ]): + self.assertTrue(dup is not od) + self.assertEqual(dup, od) + self.assertEqual(list(dup.items()), list(od.items())) + self.assertEqual(len(dup), len(od)) + self.assertEqual(type(dup), type(od)) + + def test_yaml_linkage(self): + # Verify that __reduce__ is setup in a way that supports PyYAML's dump() feature. + # In yaml, lists are native but tuples are not. + pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] + od = OrderedDict(pairs) + # yaml.dump(od) --> + # '!!python/object/apply:__main__.OrderedDict\n- - [a, 1]\n - [b, 2]\n' + self.assertTrue(all(type(pair)==list for pair in od.__reduce__()[1])) + + # def test_reduce_not_too_fat(self): + # # do not save instance dictionary if not needed + # pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] + # od = OrderedDict(pairs) + # self.assertEqual(len(od.__reduce__()), 2) + # od.x = 10 + # self.assertEqual(len(od.__reduce__()), 3) + + def test_repr(self): + od = OrderedDict([('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)]) + if sys.version_info[0] == 3 and sys.version_info[1] >= 12: + self.assertEqual(repr(od), + "OrderedDict({'c': 1, 'b': 2, 'a': 3, 'd': 4, 'e': 5, 'f': 6})") + else: + self.assertEqual(repr(od), + "OrderedDict([('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)])") + self.assertEqual(eval(repr(od)), od) + self.assertEqual(repr(OrderedDict()), "OrderedDict()") + + def test_repr_recursive(self): + # See issue #9826 + od = OrderedDict.fromkeys('abc') + od['x'] = od + if sys.version_info[0] == 3 and sys.version_info[1] >= 12: + self.assertEqual(repr(od), + "OrderedDict({'a': None, 'b': None, 'c': None, 'x': ...})") + else: + self.assertEqual(repr(od), + "OrderedDict([('a', None), ('b', None), ('c', None), ('x', ...)])") + + def test_setdefault(self): + pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] + shuffle(pairs) + od = OrderedDict(pairs) + pair_order = list(od.items()) + self.assertEqual(od.setdefault('a', 10), 3) + # make sure order didn't change + self.assertEqual(list(od.items()), pair_order) + self.assertEqual(od.setdefault('x', 10), 10) + # make sure 'x' is added to the end + self.assertEqual(list(od.items())[-1], ('x', 10)) + + # make sure setdefault still works when __missing__ is defined + class Missing(OrderedDict): + def __missing__(self, key): + return 0 + self.assertEqual(Missing().setdefault(5, 9), 9) + + def test_reinsert(self): + # Given insert a, insert b, delete a, re-insert a, + # verify that a is now later than b. + od = OrderedDict() + od['a'] = 1 + od['b'] = 2 + del od['a'] + od['a'] = 1 + self.assertEqual(list(od.items()), [('b', 2), ('a', 1)]) + + @expectedFailurePY27 + def test_move_to_end(self): + od = OrderedDict.fromkeys('abcde') + self.assertEqual(list(od), list('abcde')) + od.move_to_end('c') + self.assertEqual(list(od), list('abdec')) + od.move_to_end('c', 0) + self.assertEqual(list(od), list('cabde')) + od.move_to_end('c', 0) + self.assertEqual(list(od), list('cabde')) + od.move_to_end('e') + self.assertEqual(list(od), list('cabde')) + with self.assertRaises(KeyError): + od.move_to_end('x') + + def test_override_update(self): + # Verify that subclasses can override update() without breaking __init__() + class MyOD(OrderedDict): + def update(self, *args, **kwds): + raise Exception() + items = [('a', 1), ('c', 3), ('b', 2)] + self.assertEqual(list(MyOD(items).items()), items) + + +if __name__ == '__main__': + unittest.main() diff --git a/future/standard_library/test/buffer_tests.py b/tests/test_future/test_buffer.py similarity index 58% rename from future/standard_library/test/buffer_tests.py rename to tests/test_future/test_buffer.py index d85958c7..74cfb74e 100644 --- a/future/standard_library/test/buffer_tests.py +++ b/tests/test_future/test_buffer.py @@ -3,12 +3,13 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from future import standard_library from future.builtins import * +from future.tests.base import unittest, expectedFailurePY26 import struct import sys + class MixinBytesBufferCommonTests(object): """Tests that work for both bytes and buffer objects. See PEP 3137. @@ -94,86 +95,86 @@ def test_isdigit(self): self.assertRaises(TypeError, self.marshal(b'abc').isdigit, 42) def test_lower(self): - self.assertEqual(b'hello', self.marshal(b'HeLLo').lower()) - self.assertEqual(b'hello', self.marshal(b'hello').lower()) + self.assertEqual(bytes(b'hello'), self.marshal(b'HeLLo').lower()) + self.assertEqual(bytes(b'hello'), self.marshal(b'hello').lower()) self.assertRaises(TypeError, self.marshal(b'hello').lower, 42) def test_upper(self): - self.assertEqual(b'HELLO', self.marshal(b'HeLLo').upper()) - self.assertEqual(b'HELLO', self.marshal(b'HELLO').upper()) + self.assertEqual(bytes(b'HELLO'), self.marshal(b'HeLLo').upper()) + self.assertEqual(bytes(b'HELLO'), self.marshal(b'HELLO').upper()) self.assertRaises(TypeError, self.marshal(b'hello').upper, 42) def test_capitalize(self): - self.assertEqual(b' hello ', self.marshal(b' hello ').capitalize()) - self.assertEqual(b'Hello ', self.marshal(b'Hello ').capitalize()) - self.assertEqual(b'Hello ', self.marshal(b'hello ').capitalize()) - self.assertEqual(b'Aaaa', self.marshal(b'aaaa').capitalize()) - self.assertEqual(b'Aaaa', self.marshal(b'AaAa').capitalize()) + self.assertEqual(bytes(b' hello '), self.marshal(b' hello ').capitalize()) + self.assertEqual(bytes(b'Hello '), self.marshal(b'Hello ').capitalize()) + self.assertEqual(bytes(b'Hello '), self.marshal(b'hello ').capitalize()) + self.assertEqual(bytes(b'Aaaa'), self.marshal(b'aaaa').capitalize()) + self.assertEqual(bytes(b'Aaaa'), self.marshal(b'AaAa').capitalize()) self.assertRaises(TypeError, self.marshal(b'hello').capitalize, 42) def test_ljust(self): - self.assertEqual(b'abc ', self.marshal(b'abc').ljust(10)) - self.assertEqual(b'abc ', self.marshal(b'abc').ljust(6)) - self.assertEqual(b'abc', self.marshal(b'abc').ljust(3)) - self.assertEqual(b'abc', self.marshal(b'abc').ljust(2)) - self.assertEqual(b'abc*******', self.marshal(b'abc').ljust(10, b'*')) + self.assertEqual(bytes(b'abc '), self.marshal(b'abc').ljust(10)) + self.assertEqual(bytes(b'abc '), self.marshal(b'abc').ljust(6)) + self.assertEqual(bytes(b'abc'), self.marshal(b'abc').ljust(3)) + self.assertEqual(bytes(b'abc'), self.marshal(b'abc').ljust(2)) + self.assertEqual(bytes(b'abc*******'), self.marshal(b'abc').ljust(10, b'*')) self.assertRaises(TypeError, self.marshal(b'abc').ljust) def test_rjust(self): - self.assertEqual(b' abc', self.marshal(b'abc').rjust(10)) - self.assertEqual(b' abc', self.marshal(b'abc').rjust(6)) - self.assertEqual(b'abc', self.marshal(b'abc').rjust(3)) - self.assertEqual(b'abc', self.marshal(b'abc').rjust(2)) - self.assertEqual(b'*******abc', self.marshal(b'abc').rjust(10, b'*')) + self.assertEqual(bytes(b' abc'), self.marshal(b'abc').rjust(10)) + self.assertEqual(bytes(b' abc'), self.marshal(b'abc').rjust(6)) + self.assertEqual(bytes(b'abc'), self.marshal(b'abc').rjust(3)) + self.assertEqual(bytes(b'abc'), self.marshal(b'abc').rjust(2)) + self.assertEqual(bytes(b'*******abc'), self.marshal(b'abc').rjust(10, b'*')) self.assertRaises(TypeError, self.marshal(b'abc').rjust) def test_center(self): - self.assertEqual(b' abc ', self.marshal(b'abc').center(10)) - self.assertEqual(b' abc ', self.marshal(b'abc').center(6)) - self.assertEqual(b'abc', self.marshal(b'abc').center(3)) - self.assertEqual(b'abc', self.marshal(b'abc').center(2)) - self.assertEqual(b'***abc****', self.marshal(b'abc').center(10, b'*')) + self.assertEqual(bytes(b' abc '), self.marshal(b'abc').center(10)) + self.assertEqual(bytes(b' abc '), self.marshal(b'abc').center(6)) + self.assertEqual(bytes(b'abc'), self.marshal(b'abc').center(3)) + self.assertEqual(bytes(b'abc'), self.marshal(b'abc').center(2)) + self.assertEqual(bytes(b'***abc****'), self.marshal(b'abc').center(10, b'*')) self.assertRaises(TypeError, self.marshal(b'abc').center) def test_swapcase(self): - self.assertEqual(b'hEllO CoMPuTErS', - self.marshal(b'HeLLo cOmpUteRs').swapcase()) + self.assertEqual(bytes(b'hEllO CoMPuTErS'), + self.marshal(bytes(b'HeLLo cOmpUteRs')).swapcase()) self.assertRaises(TypeError, self.marshal(b'hello').swapcase, 42) def test_zfill(self): - self.assertEqual(b'123', self.marshal(b'123').zfill(2)) - self.assertEqual(b'123', self.marshal(b'123').zfill(3)) - self.assertEqual(b'0123', self.marshal(b'123').zfill(4)) - self.assertEqual(b'+123', self.marshal(b'+123').zfill(3)) - self.assertEqual(b'+123', self.marshal(b'+123').zfill(4)) - self.assertEqual(b'+0123', self.marshal(b'+123').zfill(5)) - self.assertEqual(b'-123', self.marshal(b'-123').zfill(3)) - self.assertEqual(b'-123', self.marshal(b'-123').zfill(4)) - self.assertEqual(b'-0123', self.marshal(b'-123').zfill(5)) - self.assertEqual(b'000', self.marshal(b'').zfill(3)) - self.assertEqual(b'34', self.marshal(b'34').zfill(1)) - self.assertEqual(b'0034', self.marshal(b'34').zfill(4)) + self.assertEqual(bytes(b'123'), self.marshal(b'123').zfill(2)) + self.assertEqual(bytes(b'123'), self.marshal(b'123').zfill(3)) + self.assertEqual(bytes(b'0123'), self.marshal(b'123').zfill(4)) + self.assertEqual(bytes(b'+123'), self.marshal(b'+123').zfill(3)) + self.assertEqual(bytes(b'+123'), self.marshal(b'+123').zfill(4)) + self.assertEqual(bytes(b'+0123'), self.marshal(b'+123').zfill(5)) + self.assertEqual(bytes(b'-123'), self.marshal(b'-123').zfill(3)) + self.assertEqual(bytes(b'-123'), self.marshal(b'-123').zfill(4)) + self.assertEqual(bytes(b'-0123'), self.marshal(b'-123').zfill(5)) + self.assertEqual(bytes(b'000'), self.marshal(b'').zfill(3)) + self.assertEqual(bytes(b'34'), self.marshal(b'34').zfill(1)) + self.assertEqual(bytes(b'0034'), self.marshal(b'34').zfill(4)) self.assertRaises(TypeError, self.marshal(b'123').zfill) def test_expandtabs(self): - self.assertEqual(b'abc\rab def\ng hi', + self.assertEqual(bytes(b'abc\rab def\ng hi'), self.marshal(b'abc\rab\tdef\ng\thi').expandtabs()) - self.assertEqual(b'abc\rab def\ng hi', + self.assertEqual(bytes(b'abc\rab def\ng hi'), self.marshal(b'abc\rab\tdef\ng\thi').expandtabs(8)) - self.assertEqual(b'abc\rab def\ng hi', + self.assertEqual(bytes(b'abc\rab def\ng hi'), self.marshal(b'abc\rab\tdef\ng\thi').expandtabs(4)) - self.assertEqual(b'abc\r\nab def\ng hi', + self.assertEqual(bytes(b'abc\r\nab def\ng hi'), self.marshal(b'abc\r\nab\tdef\ng\thi').expandtabs(4)) - self.assertEqual(b'abc\rab def\ng hi', + self.assertEqual(bytes(b'abc\rab def\ng hi'), self.marshal(b'abc\rab\tdef\ng\thi').expandtabs()) - self.assertEqual(b'abc\rab def\ng hi', + self.assertEqual(bytes(b'abc\rab def\ng hi'), self.marshal(b'abc\rab\tdef\ng\thi').expandtabs(8)) - self.assertEqual(b'abc\r\nab\r\ndef\ng\r\nhi', + self.assertEqual(bytes(b'abc\r\nab\r\ndef\ng\r\nhi'), self.marshal(b'abc\r\nab\r\ndef\ng\r\nhi').expandtabs(4)) - self.assertEqual(b' a\n b', self.marshal(b' \ta\n\tb').expandtabs(1)) + self.assertEqual(bytes(b' a\n b'), self.marshal(b' \ta\n\tb').expandtabs(1)) self.assertRaises(TypeError, self.marshal(b'hello').expandtabs, 42, 42) # This test is only valid when sizeof(int) == sizeof(void*) == 4. @@ -182,23 +183,24 @@ def test_expandtabs(self): self.marshal(b'\ta\n\tb').expandtabs, sys.maxsize) def test_title(self): - self.assertEqual(b' Hello ', self.marshal(b' hello ').title()) - self.assertEqual(b'Hello ', self.marshal(b'hello ').title()) - self.assertEqual(b'Hello ', self.marshal(b'Hello ').title()) - self.assertEqual(b'Format This As Title String', + self.assertEqual(bytes(b' Hello '), self.marshal(b' hello ').title()) + self.assertEqual(bytes(b'Hello '), self.marshal(b'hello ').title()) + self.assertEqual(bytes(b'Hello '), self.marshal(b'Hello ').title()) + self.assertEqual(bytes(b'Format This As Title String'), self.marshal(b'fOrMaT thIs aS titLe String').title()) - self.assertEqual(b'Format,This-As*Title;String', + self.assertEqual(bytes(b'Format,This-As*Title;String'), self.marshal(b'fOrMaT,thIs-aS*titLe;String').title()) - self.assertEqual(b'Getint', self.marshal(b'getInt').title()) + self.assertEqual(bytes(b'Getint'), self.marshal(b'getInt').title()) self.assertRaises(TypeError, self.marshal(b'hello').title, 42) def test_splitlines(self): - self.assertEqual([b'abc', b'def', b'', b'ghi'], + self.assertEqual([bytes(b'abc'), bytes(b'def'), bytes(b''), bytes(b'ghi')], self.marshal(b'abc\ndef\n\rghi').splitlines()) - self.assertEqual([b'abc', b'def', b'', b'ghi'], + self.assertEqual([bytes(b'abc'), bytes(b'def'), bytes(b''), bytes(b'ghi')], self.marshal(b'abc\ndef\n\r\nghi').splitlines()) - self.assertEqual([b'abc', b'def', b'ghi'], + self.assertEqual([bytes(b'abc'), bytes(b'def'), bytes(b'ghi')], self.marshal(b'abc\ndef\r\nghi').splitlines()) + # TODO: add bytes calls around these too ... self.assertEqual([b'abc', b'def', b'ghi'], self.marshal(b'abc\ndef\r\nghi\n').splitlines()) self.assertEqual([b'abc', b'def', b'ghi', b''], @@ -210,9 +212,40 @@ def test_splitlines(self): self.assertEqual([b'\n', b'abc\n', b'def\r\n', b'ghi\n', b'\r'], self.marshal(b'\nabc\ndef\r\nghi\n\r').splitlines(True)) self.assertEqual([b'', b'abc', b'def', b'ghi', b''], - self.marshal(b'\nabc\ndef\r\nghi\n\r').splitlines(keepends=False)) + self.marshal(b'\nabc\ndef\r\nghi\n\r').splitlines(False)) self.assertEqual([b'\n', b'abc\n', b'def\r\n', b'ghi\n', b'\r'], - self.marshal(b'\nabc\ndef\r\nghi\n\r').splitlines(keepends=True)) + self.marshal(b'\nabc\ndef\r\nghi\n\r').splitlines(True)) self.assertRaises(TypeError, self.marshal(b'abc').splitlines, 42, 42) + +# From Python-3.3.5/Lib/test/test_bytes.py: + +class BytearrayPEP3137Test(unittest.TestCase, + MixinBytesBufferCommonTests): + def marshal(self, x): + return bytearray(bytes(x)) + + @expectedFailurePY26 + def test_returns_new_copy(self): + val = self.marshal(b'1234') + # On immutable types these MAY return a reference to themselves + # but on mutable types like bytearray they MUST return a new copy. + for methname in ('zfill', 'rjust', 'ljust', 'center'): + method = getattr(val, methname) + newval = method(3) + self.assertEqual(val, newval) + self.assertTrue(val is not newval, + methname+' returned self on a mutable object') + for expr in ('val.split()[0]', 'val.rsplit()[0]', + 'val.partition(b".")[0]', 'val.rpartition(b".")[2]', + 'val.splitlines()[0]', 'val.replace(b"", b"")'): + newval = eval(expr) + self.assertEqual(val, newval) + self.assertTrue(val is not newval, + expr+' returned val on a mutable object') + + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_future/test_builtins.py b/tests/test_future/test_builtins.py new file mode 100644 index 00000000..0da3fc2d --- /dev/null +++ b/tests/test_future/test_builtins.py @@ -0,0 +1,1881 @@ +# -*- coding: utf-8 -*- +""" +Tests to make sure the behaviour of the builtins is sensible and correct. +""" + +from __future__ import absolute_import, division, print_function, unicode_literals +from future.builtins import (bytes, dict, int, range, round, str, super, + ascii, chr, hex, input, next, oct, open, pow, + filter, map, zip, min, max) + +from future.utils import PY3, exec_, native_str, implements_iterator +from future.tests.base import (unittest, skip26, expectedFailurePY2, + expectedFailurePY26) + +import sys +import textwrap +import tempfile +import os +from subprocess import Popen, PIPE +from numbers import Integral +from decimal import Decimal + + +class TestBuiltins(unittest.TestCase): + def setUp(self): + self.tempdir = tempfile.mkdtemp() + os.path.sep + + def test_super(self): + class verbose_list(list): + ''' + A class that uses the new simpler super() function + ''' + def append(self, item): + print('Adding an item') + super().append(item) + + l = verbose_list() + l.append('blah') + self.assertEqual(l[0], 'blah') + self.assertEqual(len(l), 1) + self.assertTrue(isinstance(l, list)) + + def test_super_2(self): + """ + This occurs in the backported email/_header_value_parser.py + module and seems to fail. + """ + class Terminal(str): + def __new__(cls, value, token_type): + self = super().__new__(cls, value) + self.token_type = token_type + self.defects = [] + return self + + DOT = Terminal('.', 'dot') + + self.assertTrue(True) + + def test_isinstance_int(self): + """ + Redefining ``int`` to a ``long`` subclass on Py2 makes this + test fail unless __instancecheck__() is defined appropriately (or + isinstance is redefined, as we used to do ...) + """ + self.assertTrue(isinstance(0, int)) + self.assertTrue(isinstance(int(1), int)) + self.assertFalse(isinstance(1.0, int)) + + def test_isinstance_Integral(self): + """ + Tests the preferred alternative to the above + """ + self.assertTrue(isinstance(0, Integral)) + + def test_isinstance_long(self): + """ + Py2's long doesn't inherit from int! + """ + self.assertTrue(isinstance(10**100, int)) + self.assertTrue(isinstance(int(2**64), int)) + if not PY3: + self.assertTrue(isinstance(long(1), int)) + # Note: the following is a SyntaxError on Py3: + # self.assertTrue(isinstance(1L, int)) + + def test_isinstance_bytes(self): + self.assertTrue(isinstance(b'byte-string', bytes)) + self.assertFalse(isinstance(b'byte-string', str)) + + def test_isinstance_str(self): + self.assertTrue(isinstance('string', str)) + self.assertTrue(isinstance(u'string', str)) + self.assertFalse(isinstance(u'string', bytes)) + + @expectedFailurePY2 + def test_type(self): + """ + The following fails when passed a unicode string on Python + (including when unicode_literals is in effect) and fails when + passed a byte-string on Python 3. So type() always wants a native + string as the first argument. + + TODO: maybe provide a replacement that works identically on Py2/3? + """ + mytype = type('blah', (dict,), {"old": 1, "new": 2}) + d = mytype() + self.assertTrue(isinstance(d, mytype)) + self.assertTrue(isinstance(d, dict)) + + def test_isinstance_tuple_of_types(self): + # These two should be equivalent, even if ``int`` is a special + # backported type. + label = 1 + self.assertTrue(isinstance(label, (float, Decimal)) or + isinstance(label, int)) + self.assertTrue(isinstance(label, (float, Decimal, int))) + self.assertTrue(isinstance(10**100, (float, Decimal, int))) + + self.assertTrue(isinstance(b'blah', (str, bytes))) + self.assertTrue(isinstance(b'blah', (bytes, float, int))) + + self.assertFalse(isinstance(b'blah', (str, Decimal, float, int))) + + self.assertTrue(isinstance('blah', (str, Decimal, float, int))) + self.assertTrue(isinstance(u'blah', (Decimal, float, int, str))) + + self.assertFalse(isinstance('blah', (bytes, Decimal, float, int))) + + def test_round(self): + """ + Note that the Python 2.x round() function fails these tests. The + Python 3.x round() function passes them, as should our custom + round() function. + """ + self.assertEqual(round(0.1250, 2), 0.12) + self.assertEqual(round(0.1350, 2), 0.14) + self.assertEqual(round(0.1251, 2), 0.13) + self.assertEqual(round(0.125000001, 2), 0.13) + self.assertEqual(round(123.5, 0), 124.0) + self.assertEqual(round(123.5), 124) + self.assertEqual(round(12.35, 2), 12.35) + self.assertEqual(round(12.35, 1), 12.3) + self.assertEqual(round(12.35, 0), 12.0) + self.assertEqual(round(123.5, 1), 123.5) + + self.assertTrue(isinstance(round(123.5, 0), float)) + self.assertTrue(isinstance(round(123.5), Integral)) + + def test_round_negative_ndigits(self): + self.assertEqual(round(10.1350, 0), 10.0) + self.assertEqual(round(10.1350, -1), 10.0) + self.assertEqual(round(10.1350, -2), 0.0) + self.assertEqual(round(10.1350, -3), 0.0) + + self.assertEqual(round(12.35, -1), 10.0) + self.assertEqual(round(12.35, -2), 0.0) + self.assertEqual(round(123.5, -1), 120.0) + self.assertEqual(round(123.5, -2), 100.0) + self.assertEqual(round(123.551, -2), 100.0) + self.assertEqual(round(123.551, -3), 0.0) + + def test_newnext_doc_example(self): + # Python 3-style iterator: + class Upper(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def __next__(self): # note the Py3 interface + return next(self._iter).upper() + def __iter__(self): + return self + + # from future.builtins import next + itr = Upper('hello') + self.assertEqual(next(itr), 'H') + self.assertEqual(next(itr), 'E') + # This doesn't work on Py2 because next() isn't defined: + # self.assertEqual(list(itr), 'LLO') + + # Check that regular Py2 iterators with just a .next method also work: + itr2 = iter(['one', 'three', 'five']) + self.assertEqual(next(itr2), 'one') + + +############################################################## +# Below here are the tests from Py3.3'2 test_builtin.py module +############################################################## + +from future.backports.test.support import TESTFN, unlink, run_unittest, check_warnings +import ast +import collections + +import io +import locale +import os +import pickle +import platform +import random +import sys +import traceback +import types +# Imported above more portably (using unittest2 on Py2.6): +import warnings +from operator import neg +try: + import pty, signal +except ImportError: + pty = signal = None + + +class Squares: + + def __init__(self, max): + self.max = max + self.sofar = [] + + def __len__(self): return len(self.sofar) + + def __getitem__(self, i): + if not 0 <= i < self.max: raise IndexError + n = len(self.sofar) + while n <= i: + self.sofar.append(n*n) + n += 1 + return self.sofar[i] + +class StrSquares: + + def __init__(self, max): + self.max = max + self.sofar = [] + + def __len__(self): + return len(self.sofar) + + def __getitem__(self, i): + if not 0 <= i < self.max: + raise IndexError + n = len(self.sofar) + while n <= i: + self.sofar.append(str(n*n)) + n += 1 + return self.sofar[i] + +class BitBucket: + def write(self, line): + pass + +test_conv_no_sign = [ + ('0', 0), + ('1', 1), + ('9', 9), + ('10', 10), + ('99', 99), + ('100', 100), + ('314', 314), + (' 314', 314), + ('314 ', 314), + (' \t\t 314 \t\t ', 314), + (repr(sys.maxsize), sys.maxsize), + (' 1x', ValueError), + (' 1 ', 1), + (' 1\02 ', ValueError), + ('', ValueError), + (' ', ValueError), + (' \t\t ', ValueError), + (str(b'\u0663\u0661\u0664 ','raw-unicode-escape'), 314), + (chr(0x200), ValueError), +] + +test_conv_sign = [ + ('0', 0), + ('1', 1), + ('9', 9), + ('10', 10), + ('99', 99), + ('100', 100), + ('314', 314), + (' 314', ValueError), + ('314 ', 314), + (' \t\t 314 \t\t ', ValueError), + (repr(sys.maxsize), sys.maxsize), + (' 1x', ValueError), + (' 1 ', ValueError), + (' 1\02 ', ValueError), + ('', ValueError), + (' ', ValueError), + (' \t\t ', ValueError), + (str(b'\u0663\u0661\u0664 ','raw-unicode-escape'), 314), + (chr(0x200), ValueError), +] + +class TestFailingBool: + def __bool__(self): + raise RuntimeError + # On Py2: + def __nonzero__(self): + raise RuntimeError + +class TestFailingIter: + def __iter__(self): + raise RuntimeError + +def filter_char(arg): + return ord(arg) > ord("d") + +def map_char(arg): + return chr(ord(arg)+1) + +class BuiltinTest(unittest.TestCase): + # Helper to check picklability + def check_iter_pickle(self, it, seq): + itorg = it + d = pickle.dumps(it) + it = pickle.loads(d) + self.assertEqual(type(itorg), type(it)) + self.assertEqual(list(it), seq) + + #test the iterator after dropping one from it + it = pickle.loads(d) + try: + next(it) + except StopIteration: + return + d = pickle.dumps(it) + it = pickle.loads(d) + self.assertEqual(list(it), seq[1:]) + + def test_import(self): + __import__('sys') + __import__('time') + __import__('string') + __import__(name='sys') + __import__(name='time', level=0) + self.assertRaises(ImportError, __import__, 'spamspam') + self.assertRaises(TypeError, __import__, 1, 2, 3, 4) + self.assertRaises(ValueError, __import__, '') + self.assertRaises(TypeError, __import__, 'sys', name='sys') + + def test_abs(self): + # int + self.assertEqual(abs(0), 0) + self.assertEqual(abs(1234), 1234) + self.assertEqual(abs(-1234), 1234) + self.assertTrue(abs(-sys.maxsize-1) > 0) + # float + self.assertEqual(abs(0.0), 0.0) + self.assertEqual(abs(3.14), 3.14) + self.assertEqual(abs(-3.14), 3.14) + # str + self.assertRaises(TypeError, abs, 'a') + # bool + self.assertEqual(abs(True), 1) + self.assertEqual(abs(False), 0) + # other + self.assertRaises(TypeError, abs) + self.assertRaises(TypeError, abs, None) + class AbsClass(object): + def __abs__(self): + return -5 + self.assertEqual(abs(AbsClass()), -5) + + def test_all(self): + self.assertEqual(all([2, 4, 6]), True) + self.assertEqual(all([2, None, 6]), False) + self.assertRaises(RuntimeError, all, [2, TestFailingBool(), 6]) + self.assertRaises(RuntimeError, all, TestFailingIter()) + self.assertRaises(TypeError, all, 10) # Non-iterable + self.assertRaises(TypeError, all) # No args + self.assertRaises(TypeError, all, [2, 4, 6], []) # Too many args + self.assertEqual(all([]), True) # Empty iterator + self.assertEqual(all([0, TestFailingBool()]), False)# Short-circuit + S = [50, 60] + self.assertEqual(all(x > 42 for x in S), True) + S = [50, 40, 60] + self.assertEqual(all(x > 42 for x in S), False) + + def test_any(self): + self.assertEqual(any([None, None, None]), False) + self.assertEqual(any([None, 4, None]), True) + self.assertRaises(RuntimeError, any, [None, TestFailingBool(), 6]) + self.assertRaises(RuntimeError, any, TestFailingIter()) + self.assertRaises(TypeError, any, 10) # Non-iterable + self.assertRaises(TypeError, any) # No args + self.assertRaises(TypeError, any, [2, 4, 6], []) # Too many args + self.assertEqual(any([]), False) # Empty iterator + self.assertEqual(any([1, TestFailingBool()]), True) # Short-circuit + S = [40, 60, 30] + self.assertEqual(any(x > 42 for x in S), True) + S = [10, 20, 30] + self.assertEqual(any(x > 42 for x in S), False) + + def test_ascii(self): + # Was: self.assertEqual(ascii(''), "''") # '\'\'') + # Heisenbug on Py2.7?! + self.assertEqual(ascii(0), '0') + self.assertEqual(ascii(()), '()') + self.assertEqual(ascii([]), '[]') + self.assertEqual(ascii({}), '{}') + a = [] + a.append(a) + self.assertEqual(ascii(a), '[[...]]') + a = {} + a[0] = a + self.assertEqual(ascii(a), '{0: {...}}') + # Advanced checks for unicode strings + def _check_uni(s): + self.assertEqual(ascii(s), repr(s)) + _check_uni("'") + _check_uni('"') + _check_uni('"\'') + _check_uni('\0') + _check_uni('\r\n\t .') + # Unprintable non-ASCII characters + _check_uni('\x85') + _check_uni('\u1fff') + _check_uni('\U00012fff') + # Lone surrogates + _check_uni('\ud800') + _check_uni('\udfff') + + # Issue #9804: surrogates should be joined even for printable + # wide characters (UCS-2 builds). + + # Fails on Py2.7. Was: + # self.assertEqual(ascii('\U0001d121'), "'\\U0001d121'") + # # All together + # s = "'\0\"\n\r\t abcd\x85é\U00012fff\uD800\U0001D121xxx." + # self.assertEqual(ascii(s), + # r"""'\'\x00"\n\r\t abcd\x85\xe9\U00012fff\ud800\U0001d121xxx.'""") + + def test_neg(self): + x = -sys.maxsize-1 + self.assertTrue(isinstance(x, int)) + self.assertEqual(-x, sys.maxsize+1) + + def test_callable(self): + self.assertTrue(callable(len)) + self.assertFalse(callable("a")) + self.assertTrue(callable(callable)) + self.assertTrue(callable(lambda x, y: x + y)) + self.assertFalse(callable(__builtins__)) + def f(): pass + self.assertTrue(callable(f)) + + class C1(object): # Was: class C1: (old-style class on Py2) + def meth(self): pass + self.assertTrue(callable(C1)) + c = C1() + self.assertTrue(callable(c.meth)) + self.assertFalse(callable(c)) + + # __call__ is looked up on the class, not the instance + c.__call__ = None + self.assertFalse(callable(c)) + c.__call__ = lambda self: 0 + self.assertFalse(callable(c)) + del c.__call__ + self.assertFalse(callable(c)) + + class C2(object): + def __call__(self): pass + c2 = C2() + self.assertTrue(callable(c2)) + c2.__call__ = None + self.assertTrue(callable(c2)) + class C3(C2): pass + c3 = C3() + self.assertTrue(callable(c3)) + + def test_chr(self): + self.assertEqual(chr(32), ' ') + self.assertEqual(chr(65), 'A') + self.assertEqual(chr(97), 'a') + self.assertEqual(chr(0xff), '\xff') + self.assertRaises(ValueError, chr, 1<<24) + self.assertRaises(TypeError, chr) + self.assertEqual(chr(0x0000FFFF), "\U0000FFFF") + self.assertRaises(ValueError, chr, -1) + self.assertRaises(ValueError, chr, 0x00110000) + self.assertRaises((OverflowError, ValueError), chr, 2**32) + + @unittest.skip('FIXME: skip on narrow builds?') + def test_ord_big(self): + """ + These tests seem to fail on OS X (narrow Python build?) + """ + self.assertEqual(chr(sys.maxunicode), + str('\\U0010ffff'.encode("ascii"), 'unicode-escape')) + self.assertEqual(ord("\U0000FFFF"), 0x0000FFFF) + self.assertEqual(ord("\U00010000"), 0x00010000) + self.assertEqual(ord("\U00010001"), 0x00010001) + self.assertEqual(ord("\U000FFFFE"), 0x000FFFFE) + self.assertEqual(ord("\U000FFFFF"), 0x000FFFFF) + self.assertEqual(ord("\U00100000"), 0x00100000) + self.assertEqual(ord("\U00100001"), 0x00100001) + self.assertEqual(ord("\U0010FFFE"), 0x0010FFFE) + self.assertEqual(ord("\U0010FFFF"), 0x0010FFFF) + + @unittest.skip('FIXME: skip on narrow builds?') + def test_chr_big(self): + """ + These tests seem to fail on OS X (narrow Python build?) + """ + self.assertEqual(ord(chr(0x10FFFF)), 0x10FFFF) + self.assertEqual(chr(0x00010000), "\U00010000") + self.assertEqual(chr(0x00010001), "\U00010001") + self.assertEqual(chr(0x000FFFFE), "\U000FFFFE") + self.assertEqual(chr(0x000FFFFF), "\U000FFFFF") + self.assertEqual(chr(0x00100000), "\U00100000") + self.assertEqual(chr(0x00100001), "\U00100001") + self.assertEqual(chr(0x0010FFFE), "\U0010FFFE") + self.assertEqual(chr(0x0010FFFF), "\U0010FFFF") + + def test_compile(self): + compile('print(1)\n', '', 'exec') + bom = b'\xef\xbb\xbf' + compile(bom + b'print(1)\n', '', 'exec') + compile(source='pass', filename='?', mode='exec') + compile(dont_inherit=0, filename='tmp', source='0', mode='eval') + compile('pass', '?', dont_inherit=1, mode='exec') + # Fails on Py2.7: + # Was: compile(memoryview(b"text"), "name", "exec") + self.assertRaises(TypeError, compile) + self.assertRaises(ValueError, compile, 'print(42)\n', '', 'badmode') + self.assertRaises(ValueError, compile, 'print(42)\n', '', 'single', 0xff) + # Raises TypeError in Python < v3.5, ValueError in v3.5, SyntaxError in >= 3.12: + self.assertRaises((TypeError, ValueError, SyntaxError), compile, chr(0), 'f', 'exec') + self.assertRaises(TypeError, compile, 'pass', '?', 'exec', + mode='eval', source='0', filename='tmp') + compile('print("\xe5")\n', '', 'exec') + self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad') + + # test the optimize argument + # These tests fail on Py2.7 ... + + # codestr = '''def f(): + # """doc""" + # try: + # assert False + # except AssertionError: + # return (True, f.__doc__) + # else: + # return (False, f.__doc__) + # ''' + # def f(): """doc""" + # values = [(-1, __debug__, f.__doc__), + # (0, True, 'doc'), + # (1, False, 'doc'), + # (2, False, None)] + # for optval, debugval, docstring in values: + # # test both direct compilation and compilation via AST + # codeobjs = [] + # codeobjs.append(compile(codestr, "", "exec", optimize=optval)) + # tree = ast.parse(codestr) + # codeobjs.append(compile(tree, "", "exec", optimize=optval)) + # for code in codeobjs: + # ns = {} + # exec_(code, ns) + # rv = ns['f']() + # self.assertEqual(rv, (debugval, docstring)) + + def test_delattr(self): + sys.spam = 1 + delattr(sys, 'spam') + self.assertRaises(TypeError, delattr) + + def test_dir(self): + # dir(wrong number of arguments) + self.assertRaises(TypeError, dir, 42, 42) + + # dir() - local scope + local_var = 1 + self.assertIn('local_var', dir()) + + # dir(module) + self.assertIn('exit', dir(sys)) + + # dir(module_with_invalid__dict__) + class Foo(types.ModuleType): + __dict__ = 8 + f = Foo(native_str("foo")) + self.assertRaises(TypeError, dir, f) + + # dir(type) + self.assertIn("strip", dir(str)) + self.assertNotIn("__mro__", dir(str)) + + # dir(obj) + class Foo(object): + def __init__(self): + self.x = 7 + self.y = 8 + self.z = 9 + f = Foo() + self.assertIn("y", dir(f)) + + # dir(obj_no__dict__) + class Foo(object): + __slots__ = [] + f = Foo() + self.assertIn("__repr__", dir(f)) + + # dir(obj_no__class__with__dict__) + # (an ugly trick to cause getattr(f, "__class__") to fail) + class Foo(object): + __slots__ = ["__class__", "__dict__"] + def __init__(self): + self.bar = "wow" + f = Foo() + self.assertNotIn("__repr__", dir(f)) + self.assertIn("bar", dir(f)) + + # dir(obj_using __dir__) + class Foo(object): + def __dir__(self): + return ["kan", "ga", "roo"] + f = Foo() + self.assertTrue(dir(f) == ["ga", "kan", "roo"]) + + # dir(obj__dir__tuple) + # Was: + # class Foo(object): + # def __dir__(self): + # return ("b", "c", "a") + # res = dir(Foo()) + # self.assertIsInstance(res, list) + # self.assertTrue(res == ["a", "b", "c"]) + + # dir(obj__dir__not_sequence) + class Foo(object): + def __dir__(self): + return 7 + f = Foo() + self.assertRaises(TypeError, dir, f) + + # These tests fail on Py2: + # # dir(traceback) + # try: + # raise IndexError + # except: + # self.assertEqual(len(dir(sys.exc_info()[2])), 4) + # + # # test that object has a __dir__() + # self.assertEqual(sorted([].__dir__()), dir([])) + + def test_divmod(self): + self.assertEqual(divmod(12, 7), (1, 5)) + self.assertEqual(divmod(-12, 7), (-2, 2)) + self.assertEqual(divmod(12, -7), (-2, -2)) + self.assertEqual(divmod(-12, -7), (1, -5)) + + self.assertEqual(divmod(-sys.maxsize-1, -1), (sys.maxsize+1, 0)) + + for num, denom, exp_result in [ (3.25, 1.0, (3.0, 0.25)), + (-3.25, 1.0, (-4.0, 0.75)), + (3.25, -1.0, (-4.0, -0.75)), + (-3.25, -1.0, (3.0, -0.25))]: + result = divmod(num, denom) + self.assertAlmostEqual(result[0], exp_result[0]) + self.assertAlmostEqual(result[1], exp_result[1]) + + self.assertRaises(TypeError, divmod) + + def test_eval(self): + self.assertEqual(eval('1+1'), 2) + self.assertEqual(eval(' 1+1\n'), 2) + globals = {'a': 1, 'b': 2} + locals = {'b': 200, 'c': 300} + self.assertEqual(eval('a', globals) , 1) + self.assertEqual(eval('a', globals, locals), 1) + self.assertEqual(eval('b', globals, locals), 200) + self.assertEqual(eval('c', globals, locals), 300) + globals = {'a': 1, 'b': 2} + locals = {'b': 200, 'c': 300} + bom = b'\xef\xbb\xbf' + self.assertEqual(eval(bom + b'a', globals, locals), 1) + self.assertEqual(eval('"\xe5"', globals), "\xe5") + self.assertRaises(TypeError, eval) + self.assertRaises(TypeError, eval, ()) + self.assertRaises(SyntaxError, eval, bom[:2] + b'a') + + def test_general_eval(self): + # Tests that general mappings can be used for the locals argument + + class M: + "Test mapping interface versus possible calls from eval()." + def __getitem__(self, key): + if key == 'a': + return 12 + raise KeyError + def keys(self): + return list('xyz') + + m = M() + g = globals() + self.assertEqual(eval('a', g, m), 12) + self.assertRaises(NameError, eval, 'b', g, m) + self.assertEqual(eval('dir()', g, m), list('xyz')) + self.assertEqual(eval('globals()', g, m), g) + self.assertEqual(eval('locals()', g, m), m) + self.assertRaises(TypeError, eval, 'a', m) + class A: + "Non-mapping" + pass + m = A() + self.assertRaises(TypeError, eval, 'a', g, m) + + # Verify that dict subclasses work as well + class D(dict): + def __getitem__(self, key): + if key == 'a': + return 12 + return dict.__getitem__(self, key) + def keys(self): + return list('xyz') + + d = D() + self.assertEqual(eval('a', g, d), 12) + self.assertRaises(NameError, eval, 'b', g, d) + self.assertEqual(eval('dir()', g, d), list('xyz')) + self.assertEqual(eval('globals()', g, d), g) + self.assertEqual(eval('locals()', g, d), d) + + # Verify locals stores (used by list comps) + eval('[locals() for i in (2,3)]', g, d) + if PY3: + from collections import UserDict + else: + from UserDict import UserDict + eval('[locals() for i in (2,3)]', g, UserDict()) + + class SpreadSheet: + "Sample application showing nested, calculated lookups." + _cells = {} + def __setitem__(self, key, formula): + self._cells[key] = formula + def __getitem__(self, key): + return eval(self._cells[key], globals(), self) + + ss = SpreadSheet() + ss['a1'] = '5' + ss['a2'] = 'a1*6' + ss['a3'] = 'a2*7' + self.assertEqual(ss['a3'], 210) + + # Verify that dir() catches a non-list returned by eval + # SF bug #1004669 + class C: + def __getitem__(self, item): + raise KeyError(item) + def keys(self): + return 1 # used to be 'a' but that's no longer an error + self.assertRaises(TypeError, eval, 'dir()', globals(), C()) + + def test_exec_(self): + g = {} + exec_('z = 1', g) + if '__builtins__' in g: + del g['__builtins__'] + self.assertEqual(g, {'z': 1}) + + exec_('z = 1+1', g) + if '__builtins__' in g: + del g['__builtins__'] + self.assertEqual(g, {'z': 2}) + g = {} + l = {} + + with check_warnings(): + warnings.filterwarnings("ignore", "global statement", + module="") + exec_('global a; a = 1; b = 2', g, l) + if '__builtins__' in g: + del g['__builtins__'] + if '__builtins__' in l: + del l['__builtins__'] + self.assertEqual((g, l), ({'a': 1}, {'b': 2})) + + def test_exec_globals(self): + code = compile("print('Hello World!')", "", "exec") + # no builtin function + # Was: + # self.assertRaisesRegex(NameError, "name 'print' is not defined", + # exec_, code, {'__builtins__': {}}) + # Now: + self.assertRaises(NameError, + exec_, code, {'__builtins__': {}}) + # __builtins__ must be a mapping type + # Was: + # self.assertRaises(TypeError, + # exec_, code, {'__builtins__': 123}) + # Raises a NameError again on Py2 + + # no __build_class__ function + code = compile("class A: pass", "", "exec") + # Was: + # self.assertRaisesRegex(NameError, "__build_class__ not found", + # exec_, code, {'__builtins__': {}}) + self.assertRaises(NameError, + exec_, code, {'__builtins__': {}}) + + class frozendict_error(Exception): + pass + + class frozendict(dict): + def __setitem__(self, key, value): + raise frozendict_error("frozendict is readonly") + + # This test seems to fail with "TypeError: 'module' object is not iterable": + # # read-only builtins + # frozen_builtins = frozendict(__builtins__) + # code = compile("__builtins__['superglobal']=2; print(superglobal)", "test", "exec") + # self.assertRaises(frozendict_error, + # exec_, code, {'__builtins__': frozen_builtins}) + + # read-only globals + namespace = frozendict({}) + code = compile("x=1", "test", "exec") + self.assertRaises(frozendict_error, + exec_, code, namespace) + + def test_exec_redirected(self): + savestdout = sys.stdout + sys.stdout = None # Whatever that cannot flush() + try: + # Used to raise SystemError('error return without exception set') + exec_('a') + except NameError: + pass + finally: + sys.stdout = savestdout + + def test_filter(self): + self.assertEqual(list(filter(lambda c: 'a' <= c <= 'z', 'Hello World')), list('elloorld')) + self.assertEqual(list(filter(None, [1, 'hello', [], [3], '', None, 9, 0])), [1, 'hello', [3], 9]) + self.assertEqual(list(filter(lambda x: x > 0, [1, -3, 9, 0, 2])), [1, 9, 2]) + self.assertEqual(list(filter(None, Squares(10))), [1, 4, 9, 16, 25, 36, 49, 64, 81]) + self.assertEqual(list(filter(lambda x: x%2, Squares(10))), [1, 9, 25, 49, 81]) + def identity(item): + return 1 + filter(identity, Squares(5)) + self.assertRaises(TypeError, filter) + class BadSeq(object): + def __getitem__(self, index): + if index<4: + return 42 + raise ValueError + self.assertRaises(ValueError, list, filter(lambda x: x, BadSeq())) + def badfunc(): + pass + self.assertRaises(TypeError, list, filter(badfunc, range(5))) + + # test bltinmodule.c::filtertuple() + self.assertEqual(list(filter(None, (1, 2))), [1, 2]) + self.assertEqual(list(filter(lambda x: x>=3, (1, 2, 3, 4))), [3, 4]) + self.assertRaises(TypeError, list, filter(42, (1, 2))) + + @expectedFailurePY2 + def test_filter_pickle(self): + f1 = filter(filter_char, "abcdeabcde") + f2 = filter(filter_char, "abcdeabcde") + self.check_iter_pickle(f1, list(f2)) + + def test_getattr(self): + self.assertTrue(getattr(sys, 'stdout') is sys.stdout) + self.assertRaises(TypeError, getattr, sys, 1) + self.assertRaises(TypeError, getattr, sys, 1, "foo") + self.assertRaises(TypeError, getattr) + # These tests fail on Py2: + # self.assertRaises(AttributeError, getattr, sys, chr(sys.maxunicode)) + # unicode surrogates are not encodable to the default encoding (utf8) + # self.assertRaises(AttributeError, getattr, 1, "\uDAD1\uD51E") + # This test fails on Py2 + + def test_hasattr(self): + self.assertTrue(hasattr(sys, 'stdout')) + self.assertRaises(TypeError, hasattr, sys, 1) + self.assertRaises(TypeError, hasattr) + # Fails on Py2: + # self.assertEqual(False, hasattr(sys, chr(sys.maxunicode))) + + # Check that hasattr propagates all exceptions outside of + # AttributeError. + class A(object): + def __getattr__(self, what): + raise SystemExit + self.assertRaises(SystemExit, hasattr, A(), "b") + class B(object): + def __getattr__(self, what): + raise ValueError + # Was: self.assertRaises(ValueError, hasattr, B(), "b") + # Fails on Py2 + + def test_hash(self): + hash(None) + self.assertEqual(hash(1), hash(1)) + self.assertEqual(hash(1), hash(1.0)) + hash('spam') + self.assertEqual(hash('spam'), hash(b'spam')) + hash((0,1,2,3)) + def f(): pass + self.assertRaises(TypeError, hash, []) + self.assertRaises(TypeError, hash, {}) + # Bug 1536021: Allow hash to return long objects + class X: + def __hash__(self): + return 2**100 + self.assertTrue(isinstance(hash(X()), int)) + class Z(int): + def __hash__(self): + return self + self.assertEqual(hash(Z(42)), hash(42)) + + def test_hex(self): + self.assertEqual(hex(16), '0x10') + self.assertEqual(hex(-16), '-0x10') + self.assertRaises(TypeError, hex, {}) + + def test_id(self): + id(None) + id(1) + id(1.0) + id('spam') + id((0,1,2,3)) + id([0,1,2,3]) + id({'spam': 1, 'eggs': 2, 'ham': 3}) + + # Test input() later, alphabetized as if it were raw_input + + def test_iter(self): + self.assertRaises(TypeError, iter) + self.assertRaises(TypeError, iter, 42, 42) + lists = [("1", "2"), ["1", "2"], "12"] + for l in lists: + i = iter(l) + self.assertEqual(next(i), '1') + self.assertEqual(next(i), '2') + self.assertRaises(StopIteration, next, i) + + def test_isinstance(self): + class C: + pass + class D(C): + pass + class E: + pass + c = C() + d = D() + e = E() + self.assertTrue(isinstance(c, C)) + self.assertTrue(isinstance(d, C)) + self.assertTrue(not isinstance(e, C)) + self.assertTrue(not isinstance(c, D)) + self.assertTrue(not isinstance('foo', E)) + self.assertRaises(TypeError, isinstance, E, 'foo') + self.assertRaises(TypeError, isinstance) + + def test_issubclass(self): + class C: + pass + class D(C): + pass + class E: + pass + c = C() + d = D() + e = E() + self.assertTrue(issubclass(D, C)) + self.assertTrue(issubclass(C, C)) + self.assertTrue(not issubclass(C, D)) + self.assertRaises(TypeError, issubclass, 'foo', E) + self.assertRaises(TypeError, issubclass, E, 'foo') + self.assertRaises(TypeError, issubclass) + + def test_len(self): + self.assertEqual(len('123'), 3) + self.assertEqual(len(()), 0) + self.assertEqual(len((1, 2, 3, 4)), 4) + self.assertEqual(len([1, 2, 3, 4]), 4) + self.assertEqual(len({}), 0) + self.assertEqual(len({'a':1, 'b': 2}), 2) + class BadSeq: + def __len__(self): + raise ValueError + self.assertRaises(ValueError, len, BadSeq()) + class InvalidLen: + def __len__(self): + return None + self.assertRaises(TypeError, len, InvalidLen()) + class FloatLen: + def __len__(self): + return 4.5 + self.assertRaises(TypeError, len, FloatLen()) + class HugeLen: + def __len__(self): + return sys.maxsize + 1 + # Was: self.assertRaises(OverflowError, len, HugeLen()) + class NoLenMethod(object): pass + self.assertRaises(TypeError, len, NoLenMethod()) + + def test_map(self): + self.assertEqual( + list(map(lambda x: x*x, range(1,4))), + [1, 4, 9] + ) + try: + from math import sqrt + except ImportError: + def sqrt(x): + return pow(x, 0.5) + self.assertEqual( + list(map(lambda x: list(map(sqrt, x)), [[16, 4], [81, 9]])), + [[4.0, 2.0], [9.0, 3.0]] + ) + self.assertEqual( + list(map(lambda x, y: x+y, [1,3,2], [9,1,4])), + [10, 4, 6] + ) + + def plus(*v): + accu = 0 + for i in v: accu = accu + i + return accu + self.assertEqual( + list(map(plus, [1, 3, 7])), + [1, 3, 7] + ) + self.assertEqual( + list(map(plus, [1, 3, 7], [4, 9, 2])), + [1+4, 3+9, 7+2] + ) + self.assertEqual( + list(map(plus, [1, 3, 7], [4, 9, 2], [1, 1, 0])), + [1+4+1, 3+9+1, 7+2+0] + ) + self.assertEqual( + list(map(int, Squares(10))), + [0, 1, 4, 9, 16, 25, 36, 49, 64, 81] + ) + def Max(a, b): + if a is None: + return b + if b is None: + return a + return max(a, b) + self.assertEqual( + list(map(Max, Squares(3), Squares(2))), + [0, 1] + ) + self.assertRaises(TypeError, map) + self.assertRaises(TypeError, map, lambda x: x, 42) + class BadSeq: + def __iter__(self): + raise ValueError + yield None + self.assertRaises(ValueError, list, map(lambda x: x, BadSeq())) + def badfunc(x): + raise RuntimeError + self.assertRaises(RuntimeError, list, map(badfunc, range(5))) + + @expectedFailurePY2 + def test_map_pickle(self): + m1 = map(map_char, "Is this the real life?") + m2 = map(map_char, "Is this the real life?") + self.check_iter_pickle(m1, list(m2)) + + def test_max(self): + self.assertEqual(max('123123'), '3') + self.assertEqual(max(1, 2, 3), 3) + self.assertEqual(max((1, 2, 3, 1, 2, 3)), 3) + self.assertEqual(max([1, 2, 3, 1, 2, 3]), 3) + + self.assertEqual(max(1, 2, 3.0), 3.0) + self.assertEqual(max(1, 2.0, 3), 3) + self.assertEqual(max(1.0, 2, 3), 3) + + for stmt in ( + "max(key=int)", # no args + "max(1, key=int)", # single arg not iterable + "max(1, 2, keystone=int)", # wrong keyword + "max(1, 2, key=int, abc=int)", # two many keywords + "max(1, 2, key=1)", # keyfunc is not callable + ): + try: + exec_(stmt, globals()) + except TypeError: + pass + else: + self.fail(stmt) + + self.assertEqual(max((1,), key=neg), 1) # one elem iterable + self.assertEqual(max((1,2), key=neg), 1) # two elem iterable + self.assertEqual(max(1, 2, key=neg), 1) # two elems + + data = [random.randrange(200) for i in range(100)] + keys = dict((elem, random.randrange(50)) for elem in data) + f = keys.__getitem__ + self.assertEqual(max(data, key=f), + sorted(reversed(data), key=f)[-1]) + + self.assertEqual(max([], default=5), 5) + with self.assertRaises(TypeError): + max(None, default=5) + with self.assertRaises(TypeError): + max(1, 2, default=0) + self.assertEqual(max([], default=0), 0) + self.assertIs(max([], default=None), None) + + def test_min(self): + self.assertEqual(min('123123'), '1') + self.assertEqual(min(1, 2, 3), 1) + self.assertEqual(min((1, 2, 3, 1, 2, 3)), 1) + self.assertEqual(min([1, 2, 3, 1, 2, 3]), 1) + + self.assertEqual(min(1, 2, 3.0), 1) + self.assertEqual(min(1, 2.0, 3), 1) + self.assertEqual(min(1.0, 2, 3), 1.0) + + self.assertRaises(TypeError, min) + self.assertRaises(TypeError, min, 42) + self.assertRaises(ValueError, min, ()) + class BadSeq: + def __getitem__(self, index): + raise ValueError + self.assertRaises(ValueError, min, BadSeq()) + self.assertEqual(max(x for x in [5, 4, 3]), 5) + + for stmt in ( + "min(key=int)", # no args + "min(1, key=int)", # single arg not iterable + "min(1, 2, keystone=int)", # wrong keyword + "min(1, 2, key=int, abc=int)", # two many keywords + "min(1, 2, key=1)", # keyfunc is not callable + ): + try: + exec_(stmt, globals()) + except TypeError: + pass + else: + self.fail(stmt) + + self.assertEqual(min((1,), key=neg), 1) # one elem iterable + self.assertEqual(min((1,2), key=neg), 2) # two elem iterable + self.assertEqual(min(1, 2, key=neg), 2) # two elems + + data = [random.randrange(200) for i in range(100)] + keys = dict((elem, random.randrange(50)) for elem in data) + f = keys.__getitem__ + self.assertEqual(min(data, key=f), + sorted(data, key=f)[0]) + self.assertEqual(min([], default=5), 5) + self.assertEqual(min([], default=0), 0) + self.assertIs(min([], default=None), None) + with self.assertRaises(TypeError): + max(None, default=5) + with self.assertRaises(TypeError): + max(1, 2, default=0) + + # Test iterables that can only be looped once #510 + self.assertEqual(min(x for x in [5]), 5) + + def test_next(self): + it = iter(range(2)) + self.assertEqual(next(it), 0) + self.assertEqual(next(it), 1) + self.assertRaises(StopIteration, next, it) + self.assertRaises(StopIteration, next, it) + self.assertEqual(next(it, 42), 42) + + class Iter(object): + def __iter__(self): + return self + def __next__(self): + raise StopIteration + + # Was: it = iter(Iter()) + # Needs this on Py2: + Iter = implements_iterator(Iter) + it = iter(Iter()) + self.assertEqual(next(it, 42), 42) + self.assertRaises(StopIteration, next, it) + + def gen(): + yield 1 + return + + it = gen() + self.assertEqual(next(it), 1) + self.assertRaises(StopIteration, next, it) + self.assertEqual(next(it, 42), 42) + + def test_oct(self): + self.assertEqual(oct(100), '0o144') + self.assertEqual(oct(-100), '-0o144') + self.assertRaises(TypeError, oct, ()) + + def write_testfile(self): + # NB the first 4 lines are also used to test input, below + fp = open(TESTFN, 'w') + try: + fp.write('1+1\n') + fp.write('The quick brown fox jumps over the lazy dog') + fp.write('.\n') + fp.write('Dear John\n') + fp.write('XXX'*100) + fp.write('YYY'*100) + finally: + fp.close() + + def test_open(self): + self.write_testfile() + fp = open(TESTFN, 'r') + try: + self.assertEqual(fp.readline(4), '1+1\n') + self.assertEqual(fp.readline(), 'The quick brown fox jumps over the lazy dog.\n') + self.assertEqual(fp.readline(4), 'Dear') + self.assertEqual(fp.readline(100), ' John\n') + self.assertEqual(fp.read(300), 'XXX'*100) + self.assertEqual(fp.read(1000), 'YYY'*100) + finally: + fp.close() + unlink(TESTFN) + + def test_open_default_encoding(self): + old_environ = dict(os.environ) + try: + # try to get a user preferred encoding different than the current + # locale encoding to check that open() uses the current locale + # encoding and not the user preferred encoding + for key in ('LC_ALL', 'LANG', 'LC_CTYPE'): + if key in os.environ: + del os.environ[key] + + self.write_testfile() + current_locale_encoding = locale.getpreferredencoding(False) + fp = open(TESTFN, 'w') + try: + self.assertEqual(fp.encoding, current_locale_encoding) + finally: + fp.close() + unlink(TESTFN) + finally: + os.environ.clear() + os.environ.update(old_environ) + + def test_ord(self): + self.assertEqual(ord(' '), 32) + self.assertEqual(ord('A'), 65) + self.assertEqual(ord('a'), 97) + self.assertEqual(ord('\x80'), 128) + self.assertEqual(ord('\xff'), 255) + + self.assertEqual(ord(b' '), 32) + self.assertEqual(ord(b'A'), 65) + self.assertEqual(ord(b'a'), 97) + self.assertEqual(ord(b'\x80'), 128) + self.assertEqual(ord(b'\xff'), 255) + + self.assertEqual(ord(chr(sys.maxunicode)), sys.maxunicode) + self.assertRaises(TypeError, ord, 42) + + def test_pow(self): + self.assertEqual(pow(0,0), 1) + self.assertEqual(pow(0,1), 0) + self.assertEqual(pow(1,0), 1) + self.assertEqual(pow(1,1), 1) + + self.assertEqual(pow(2,0), 1) + self.assertEqual(pow(2,10), 1024) + self.assertEqual(pow(2,20), 1024*1024) + self.assertEqual(pow(2,30), 1024*1024*1024) + + self.assertEqual(pow(-2,0), 1) + self.assertEqual(pow(-2,1), -2) + self.assertEqual(pow(-2,2), 4) + self.assertEqual(pow(-2,3), -8) + + self.assertAlmostEqual(pow(0.,0), 1.) + self.assertAlmostEqual(pow(0.,1), 0.) + self.assertAlmostEqual(pow(1.,0), 1.) + self.assertAlmostEqual(pow(1.,1), 1.) + + self.assertAlmostEqual(pow(2.,0), 1.) + self.assertAlmostEqual(pow(2.,10), 1024.) + self.assertAlmostEqual(pow(2.,20), 1024.*1024.) + self.assertAlmostEqual(pow(2.,30), 1024.*1024.*1024.) + + self.assertAlmostEqual(pow(-2.,0), 1.) + self.assertAlmostEqual(pow(-2.,1), -2.) + self.assertAlmostEqual(pow(-2.,2), 4.) + self.assertAlmostEqual(pow(-2.,3), -8.) + + for x in 2, int(2), 2.0: + for y in 10, int(10), 10.0: + for z in 1000, int(1000), 1000.0: + if isinstance(x, float) or \ + isinstance(y, float) or \ + isinstance(z, float): + self.assertRaises(TypeError, pow, x, y, z) + else: + self.assertAlmostEqual(pow(x, y, z), 24.0) + + self.assertAlmostEqual(pow(-1, 0.5), 1j) + self.assertAlmostEqual(pow(-1, 1/3), 0.5 + 0.8660254037844386j) + + # Raises TypeError in Python < v3.5, ValueError in v3.5-v3.7: + if sys.version_info[:2] < (3, 8): + self.assertRaises((TypeError, ValueError), pow, -1, -2, 3) + else: + # Changed in version 3.8: For int operands, the three-argument form + # of pow now allows the second argument to be negative, permitting + # computation of modular inverses. + self.assertEqual(pow(-1, -2, 3), 1) + self.assertRaises(ValueError, pow, 1, 2, 0) + + self.assertRaises(TypeError, pow) + + def test_input(self): + self.write_testfile() + fp = open(TESTFN, 'r') + savestdin = sys.stdin + savestdout = sys.stdout # Eats the echo + try: + sys.stdin = fp + sys.stdout = BitBucket() + self.assertEqual(input(), "1+1") + self.assertEqual(input(), 'The quick brown fox jumps over the lazy dog.') + self.assertEqual(input('testing\n'), 'Dear John') + + # SF 1535165: don't segfault on closed stdin + # sys.stdout must be a regular file for triggering + sys.stdout = savestdout + sys.stdin.close() + self.assertRaises(ValueError, input) + + sys.stdout = BitBucket() + sys.stdin = io.StringIO("NULL\0") + self.assertRaises(TypeError, input, 42, 42) + sys.stdin = io.StringIO(" 'whitespace'") + self.assertEqual(input(), " 'whitespace'") + sys.stdin = io.StringIO() + self.assertRaises(EOFError, input) + + del sys.stdout + self.assertRaises(RuntimeError, input, 'prompt') + del sys.stdin + self.assertRaises(RuntimeError, input, 'prompt') + finally: + sys.stdin = savestdin + sys.stdout = savestdout + fp.close() + unlink(TESTFN) + + @expectedFailurePY2 + @unittest.skipUnless(pty, "the pty and signal modules must be available") + def check_input_tty(self, prompt, terminal_input, stdio_encoding=None): + if not sys.stdin.isatty() or not sys.stdout.isatty(): + self.skipTest("stdin and stdout must be ttys") + r, w = os.pipe() + try: + pid, fd = pty.fork() + except (OSError, AttributeError) as e: + os.close(r) + os.close(w) + self.skipTest("pty.fork() raised {0}".format(e)) + if pid == 0: + # Child + try: + # Make sure we don't get stuck if there's a problem + signal.alarm(2) + os.close(r) + # Check the error handlers are accounted for + if stdio_encoding: + sys.stdin = io.TextIOWrapper(sys.stdin.detach(), + encoding=stdio_encoding, + errors='surrogateescape') + sys.stdout = io.TextIOWrapper(sys.stdout.detach(), + encoding=stdio_encoding, + errors='replace') + with open(w, "w") as wpipe: + print("tty =", sys.stdin.isatty() and sys.stdout.isatty(), file=wpipe) + print(ascii(input(prompt)), file=wpipe) + except: + traceback.print_exc() + finally: + # We don't want to return to unittest... + os._exit(0) + # Parent + os.close(w) + os.write(fd, terminal_input + b"\r\n") + # Get results from the pipe + with open(r, "r") as rpipe: + lines = [] + while True: + line = rpipe.readline().strip() + if line == "": + # The other end was closed => the child exited + break + lines.append(line) + # Check the result was got and corresponds to the user's terminal input + if len(lines) != 2: + # Something went wrong, try to get at stderr + with open(fd, "r", encoding="ascii", errors="ignore") as child_output: + self.fail("got %d lines in pipe but expected 2, child output was:\n%s" + % (len(lines), child_output.read())) + os.close(fd) + # Check we did exercise the GNU readline path + self.assertIn(lines[0], set(['tty = True', 'tty = False'])) + if lines[0] != 'tty = True': + self.skipTest("standard IO in should have been a tty") + input_result = eval(lines[1]) # ascii() -> eval() roundtrip + if stdio_encoding: + expected = terminal_input.decode(stdio_encoding, 'surrogateescape') + else: + expected = terminal_input.decode(sys.stdin.encoding) # what else? + self.assertEqual(input_result, expected) + + @expectedFailurePY26 + def test_input_tty(self): + # Test input() functionality when wired to a tty (the code path + # is different and invokes GNU readline if available). + self.check_input_tty("prompt", b"quux") + + @expectedFailurePY26 + def test_input_tty_non_ascii(self): + # Check stdin/stdout encoding is used when invoking GNU readline + self.check_input_tty("prompté", b"quux\xe9", "utf-8") + + @expectedFailurePY26 + def test_input_tty_non_ascii_unicode_errors(self): + # Check stdin/stdout error handler is used when invoking GNU readline + self.check_input_tty("prompté", b"quux\xe9", "ascii") + + # test_int(): see test_int.py for tests of built-in function int(). + + def test_repr(self): + # Was: self.assertEqual(repr(''), "\'\'") + # Why is this failing on Py2.7? A Heisenbug ... + self.assertEqual(repr(0), '0') + self.assertEqual(repr(()), '()') + self.assertEqual(repr([]), '[]') + self.assertEqual(repr({}), '{}') + + # Future versions of the above: + self.assertEqual(repr(str('')), '\'\'') + self.assertEqual(repr(int(0)), '0') + self.assertEqual(repr(dict({})), '{}') + self.assertEqual(repr(dict()), '{}') + + a = [] + a.append(a) + self.assertEqual(repr(a), '[[...]]') + a = {} + a[0] = a + self.assertEqual(repr(a), '{0: {...}}') + + @expectedFailurePY2 + def test_round(self): + self.assertEqual(round(0.0), 0.0) + # Was: self.assertEqual(type(round(0.0)), int) + # Now: + self.assertTrue(isinstance(round(0.0), int)) + self.assertEqual(round(1.0), 1.0) + self.assertEqual(round(10.0), 10.0) + self.assertEqual(round(1000000000.0), 1000000000.0) + self.assertEqual(round(1e20), 1e20) + + self.assertEqual(round(-1.0), -1.0) + self.assertEqual(round(-10.0), -10.0) + self.assertEqual(round(-1000000000.0), -1000000000.0) + self.assertEqual(round(-1e20), -1e20) + + self.assertEqual(round(0.1), 0.0) + self.assertEqual(round(1.1), 1.0) + self.assertEqual(round(10.1), 10.0) + self.assertEqual(round(1000000000.1), 1000000000.0) + + self.assertEqual(round(-1.1), -1.0) + self.assertEqual(round(-10.1), -10.0) + self.assertEqual(round(-1000000000.1), -1000000000.0) + + self.assertEqual(round(0.9), 1.0) + self.assertEqual(round(9.9), 10.0) + self.assertEqual(round(999999999.9), 1000000000.0) + + self.assertEqual(round(-0.9), -1.0) + self.assertEqual(round(-9.9), -10.0) + self.assertEqual(round(-999999999.9), -1000000000.0) + + self.assertEqual(round(-8.0, -1), -10.0) + self.assertEqual(type(round(-8.0, -1)), float) + + self.assertEqual(type(round(-8.0, 0)), float) + self.assertEqual(type(round(-8.0, 1)), float) + + # Check even / odd rounding behaviour + self.assertEqual(round(5.5), 6) + self.assertEqual(round(6.5), 6) + self.assertEqual(round(-5.5), -6) + self.assertEqual(round(-6.5), -6) + + # Check behavior on ints + self.assertEqual(round(0), 0) + self.assertEqual(round(8), 8) + self.assertEqual(round(-8), -8) + # Was: + # self.assertEqual(type(round(0)), int) + # self.assertEqual(type(round(-8, -1)), int) + # self.assertEqual(type(round(-8, 0)), int) + # self.assertEqual(type(round(-8, 1)), int) + # Now: + self.assertTrue(isinstance(round(0), int)) + self.assertTrue(isinstance(round(-8, -1), int)) + self.assertTrue(isinstance(round(-8, 0), int)) + self.assertTrue(isinstance(round(-8, 1), int)) + + # test new kwargs + self.assertEqual(round(number=-8.0, ndigits=-1), -10.0) + + self.assertRaises(TypeError, round) + + # test generic rounding delegation for reals + class TestRound: + def __round__(self): + return 23 + + class TestNoRound: + pass + + self.assertEqual(round(TestRound()), 23) + + self.assertRaises(TypeError, round, 1, 2, 3) + self.assertRaises(TypeError, round, TestNoRound()) + + t = TestNoRound() + t.__round__ = lambda *args: args + self.assertRaises(TypeError, round, t) + self.assertRaises(TypeError, round, t, 0) + + # # Some versions of glibc for alpha have a bug that affects + # # float -> integer rounding (floor, ceil, rint, round) for + # # values in the range [2**52, 2**53). See: + # # + # # http://sources.redhat.com/bugzilla/show_bug.cgi?id=5350 + # # + # # We skip this test on Linux/alpha if it would fail. + # linux_alpha = (platform.system().startswith('Linux') and + # platform.machine().startswith('alpha')) + # system_round_bug = round(5e15+1) != 5e15+1 + # @unittest.skipIf(PY26)linux_alpha and system_round_bug, + # "test will fail; failure is probably due to a " + # "buggy system round function") + @skip26 + def test_round_large(self): + # Issue #1869: integral floats should remain unchanged + self.assertEqual(round(5e15-1), 5e15-1) + self.assertEqual(round(5e15), 5e15) + self.assertEqual(round(5e15+1), 5e15+1) + self.assertEqual(round(5e15+2), 5e15+2) + self.assertEqual(round(5e15+3), 5e15+3) + + def test_setattr(self): + setattr(sys, 'spam', 1) + self.assertEqual(sys.spam, 1) + self.assertRaises(TypeError, setattr, sys, 1, 'spam') + self.assertRaises(TypeError, setattr) + + # test_str(): see test_unicode.py and test_bytes.py for str() tests. + + def test_sum(self): + self.assertEqual(sum([]), 0) + self.assertEqual(sum(list(range(2,8))), 27) + self.assertEqual(sum(iter(list(range(2,8)))), 27) + self.assertEqual(sum(Squares(10)), 285) + self.assertEqual(sum(iter(Squares(10))), 285) + self.assertEqual(sum([[1], [2], [3]], []), [1, 2, 3]) + + self.assertRaises(TypeError, sum) + self.assertRaises(TypeError, sum, 42) + self.assertRaises(TypeError, sum, ['a', 'b', 'c']) + self.assertRaises(TypeError, sum, ['a', 'b', 'c'], '') + self.assertRaises(TypeError, sum, [b'a', b'c'], b'') + # Was: + # values = [bytearray(b'a'), bytearray(b'b')] + # self.assertRaises(TypeError, sum, values, bytearray(b'')) + # Currently fails on Py2 -- i.e. sum(values, bytearray(b'')) is allowed + self.assertRaises(TypeError, sum, [[1], [2], [3]]) + self.assertRaises(TypeError, sum, [{2:3}]) + self.assertRaises(TypeError, sum, [{2:3}]*2, {2:3}) + + class BadSeq: + def __getitem__(self, index): + raise ValueError + self.assertRaises(ValueError, sum, BadSeq()) + + empty = [] + sum(([x] for x in range(10)), empty) + self.assertEqual(empty, []) + + def test_type(self): + self.assertEqual(type(''), type('123')) + self.assertNotEqual(type(''), type(())) + + # We don't want self in vars(), so these are static methods + + @staticmethod + def get_vars_f0(): + return vars() + + @staticmethod + def get_vars_f2(): + BuiltinTest.get_vars_f0() + a = 1 + b = 2 + return vars() + + class C_get_vars(object): + def getDict(self): + return {'a':2} + __dict__ = property(fget=getDict) + + def test_vars(self): + self.assertEqual(set(vars()), set(dir())) + self.assertEqual(set(vars(sys)), set(dir(sys))) + self.assertEqual(self.get_vars_f0(), {}) + self.assertEqual(self.get_vars_f2(), {'a': 1, 'b': 2}) + self.assertRaises(TypeError, vars, 42, 42) + self.assertRaises(TypeError, vars, 42) + self.assertEqual(vars(self.C_get_vars()), {'a':2}) + + def test_zip(self): + a = (1, 2, 3) + b = (4, 5, 6) + t = [(1, 4), (2, 5), (3, 6)] + self.assertEqual(list(zip(a, b)), t) + b = [4, 5, 6] + self.assertEqual(list(zip(a, b)), t) + b = (4, 5, 6, 7) + self.assertEqual(list(zip(a, b)), t) + class I: + def __getitem__(self, i): + if i < 0 or i > 2: raise IndexError + return i + 4 + self.assertEqual(list(zip(a, I())), t) + self.assertEqual(list(zip()), []) + self.assertEqual(list(zip(*[])), []) + self.assertRaises(TypeError, zip, None) + class G: + pass + self.assertRaises(TypeError, zip, a, G()) + self.assertRaises(RuntimeError, zip, a, TestFailingIter()) + + # Make sure zip doesn't try to allocate a billion elements for the + # result list when one of its arguments doesn't say how long it is. + # A MemoryError is the most likely failure mode. + class SequenceWithoutALength: + def __getitem__(self, i): + if i == 5: + raise IndexError + else: + return i + self.assertEqual( + list(zip(SequenceWithoutALength(), range(2**30))), + list(enumerate(range(5))) + ) + + class BadSeq: + def __getitem__(self, i): + if i == 5: + raise ValueError + else: + return i + self.assertRaises(ValueError, list, zip(BadSeq(), BadSeq())) + + @expectedFailurePY2 + def test_zip_pickle(self): + a = (1, 2, 3) + b = (4, 5, 6) + t = [(1, 4), (2, 5), (3, 6)] + z1 = zip(a, b) + self.check_iter_pickle(z1, t) + + def test_format(self): + # Test the basic machinery of the format() builtin. Don't test + # the specifics of the various formatters + self.assertEqual(format(3, ''), '3') + + # Returns some classes to use for various tests. There's + # an old-style version, and a new-style version + def classes_new(): + class A(object): + def __init__(self, x): + self.x = x + def __format__(self, format_spec): + return str(self.x) + format_spec + class DerivedFromA(A): + pass + + class Simple(object): pass + class DerivedFromSimple(Simple): + def __init__(self, x): + self.x = x + def __format__(self, format_spec): + return str(self.x) + format_spec + class DerivedFromSimple2(DerivedFromSimple): pass + return A, DerivedFromA, DerivedFromSimple, DerivedFromSimple2 + + def class_test(A, DerivedFromA, DerivedFromSimple, DerivedFromSimple2): + self.assertEqual(format(A(3), 'spec'), '3spec') + self.assertEqual(format(DerivedFromA(4), 'spec'), '4spec') + self.assertEqual(format(DerivedFromSimple(5), 'abc'), '5abc') + self.assertEqual(format(DerivedFromSimple2(10), 'abcdef'), + '10abcdef') + + class_test(*classes_new()) + + def empty_format_spec(value): + # test that: + # format(x, '') == str(x) + # format(x) == str(x) + self.assertEqual(format(value, ""), str(value)) + self.assertEqual(format(value), str(value)) + + # for builtin types, format(x, "") == str(x) + empty_format_spec(17**13) + empty_format_spec(1.0) + empty_format_spec(3.1415e104) + empty_format_spec(-3.1415e104) + empty_format_spec(3.1415e-104) + empty_format_spec(-3.1415e-104) + empty_format_spec(object) + empty_format_spec(None) + + # TypeError because self.__format__ returns the wrong type + class BadFormatResult: + def __format__(self, format_spec): + return 1.0 + self.assertRaises(TypeError, format, BadFormatResult(), "") + + # TypeError because format_spec is not unicode or str + self.assertRaises(TypeError, format, object(), 4) + self.assertRaises(TypeError, format, object(), object()) + + # tests for object.__format__ really belong elsewhere, but + # there's no good place to put them + x = object().__format__('') + self.assertTrue(x.startswith('= 4: + if should_raise_warning: + self.assertRaises(TypeError, format, obj, fmt_str) + else: + try: + format(obj, fmt_str) + except TypeError: + self.fail('object.__format__ raised TypeError unexpectedly') + else: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always", DeprecationWarning) + format(obj, fmt_str) + # Was: + # if should_raise_warning: + # self.assertEqual(len(w), 1) + # self.assertIsInstance(w[0].message, DeprecationWarning) + # self.assertIn('object.__format__ with a non-empty format ' + # 'string', str(w[0].message)) + # else: + # self.assertEqual(len(w), 0) + # Py2.7 fails these tests + + fmt_strs = ['', 's'] + + class A: + def __format__(self, fmt_str): + return format('', fmt_str) + + for fmt_str in fmt_strs: + test_deprecated_format_string(A(), fmt_str, False) + + class B: + pass + + class C(object): + pass + + for cls in [object, B, C]: + for fmt_str in fmt_strs: + test_deprecated_format_string(cls(), fmt_str, len(fmt_str) != 0) + # -------------------------------------------------------------------- + + # make sure we can take a subclass of str as a format spec + class DerivedFromStr(str): pass + self.assertEqual(format(0, DerivedFromStr('10')), ' 0') + + def test_bin(self): + self.assertEqual(bin(0), '0b0') + self.assertEqual(bin(1), '0b1') + self.assertEqual(bin(-1), '-0b1') + self.assertEqual(bin(2**65), '0b1' + '0' * 65) + self.assertEqual(bin(2**65-1), '0b' + '1' * 65) + self.assertEqual(bin(-(2**65)), '-0b1' + '0' * 65) + self.assertEqual(bin(-(2**65-1)), '-0b' + '1' * 65) + + def test_bytearray_translate(self): + x = bytearray(b"abc") + self.assertRaises(ValueError, x.translate, b"1", 1) + self.assertRaises(TypeError, x.translate, b"1"*256, 1) + + def test_construct_singletons(self): + for const in None, Ellipsis, NotImplemented: + tp = type(const) + # Was: self.assertIs(tp(), const) + # Fails for Py2 + self.assertRaises(TypeError, tp, 1, 2) + self.assertRaises(TypeError, tp, a=1, b=2) + +class TestSorted(unittest.TestCase): + + def test_basic(self): + data = list(range(100)) + copy = data[:] + random.shuffle(copy) + self.assertEqual(data, sorted(copy)) + self.assertNotEqual(data, copy) + + data.reverse() + random.shuffle(copy) + self.assertEqual(data, sorted(copy, key=lambda x: -x)) + self.assertNotEqual(data, copy) + random.shuffle(copy) + self.assertEqual(data, sorted(copy, reverse=1)) + self.assertNotEqual(data, copy) + + def test_inputtypes(self): + s = 'abracadabra' + types = [list, tuple, str] + for T in types: + self.assertEqual(sorted(s), sorted(T(s))) + + s = ''.join(set(s)) # unique letters only + types = [str, set, frozenset, list, tuple, dict.fromkeys] + for T in types: + self.assertEqual(sorted(s), sorted(T(s))) + + def test_baddecorator(self): + data = 'The quick Brown fox Jumped over The lazy Dog'.split() + self.assertRaises(TypeError, sorted, data, None, lambda x,y: 0) + + + # def test_input(self, interpreter='python2'): + # """ + # Passes in a string to the waiting input() + # """ + # code = ''' + # from future.builtins import input + # def greet(name): + # print "Hello, {0}!".format(name) + # print "What's your name?" + # name = input() + # greet(name) + # ''' + # with open(self.tempdir + 'input_test_script.py', 'w') as f: + # f.write(textwrap.dedent(code)) + # p1 = Popen([interpreter, 'input_test_script.py'], stdout=PIPE, stdin=PIPE, stderr=None) + # (stdout, stderr) = p1.communicate(b'Ed') + # # print(stdout) + # # print(stderr) + # self.assertEqual(stdout, b"What's your name?\nHello, Ed!\n") + + +if __name__ == '__main__': + unittest.main() diff --git a/future/tests/test_builtins_explicit_import.py b/tests/test_future/test_builtins_explicit_import.py similarity index 82% rename from future/tests/test_builtins_explicit_import.py rename to tests/test_future/test_builtins_explicit_import.py index 6ca1aa8e..24800c4b 100644 --- a/future/tests/test_builtins_explicit_import.py +++ b/tests/test_future/test_builtins_explicit_import.py @@ -5,8 +5,8 @@ from __future__ import absolute_import, division, unicode_literals from future.builtins import (filter, map, zip) -from future.builtins import (ascii, chr, hex, input, isinstance, oct, open) -from future.builtins import (bytes, int, range, round, str, super) +from future.builtins import (ascii, chr, hex, input, isinstance, next, oct, open) +from future.builtins import (bytes, dict, int, range, round, str, super) from future.tests.base import unittest diff --git a/tests/test_future/test_bytes.py b/tests/test_future/test_bytes.py new file mode 100644 index 00000000..b9b157d8 --- /dev/null +++ b/tests/test_future/test_bytes.py @@ -0,0 +1,786 @@ +# -*- coding: utf-8 -*- +""" +Tests for the backported bytes object +""" + +from __future__ import absolute_import, unicode_literals, print_function +from future.builtins import * +from future import utils + +from numbers import Integral +from future.tests.base import unittest, expectedFailurePY2 + + +TEST_UNICODE_STR = u'ℝεα∂@ßʟ℮ ☂ℯṧт υηḯ¢☺ḓ℮' +# Tk icon as a .gif: +TEST_BYTE_STR = b'GIF89a\x0e\x00\x0b\x00\x80\xff\x00\xff\x00\x00\xc0\xc0\xc0!\xf9\x04\x01\x00\x00\x01\x00,\x00\x00\x00\x00\x0e\x00\x0b\x00@\x02\x1f\x0c\x8e\x10\xbb\xcan\x90\x99\xaf&\xd8\x1a\xce\x9ar\x06F\xd7\xf1\x90\xa1c\x9e\xe8\x84\x99\x89\x97\xa2J\x01\x00;\x1a\x14\x00;;\xba\nD\x14\x00\x00;;' + + +class TestBytes(unittest.TestCase): + def test_bytes_encoding_arg(self): + """ + The bytes class has changed in Python 3 to accept an + additional argument in the constructor: encoding. + + It would be nice to support this without breaking the + isinstance(..., bytes) test below. + """ + u = u'Unicode string: \u5b54\u5b50' + b = bytes(u, encoding='utf-8') + self.assertEqual(b, u.encode('utf-8')) + + nu = str(u) + b = bytes(nu, encoding='utf-8') + self.assertEqual(b, u.encode('utf-8')) + + def test_bytes_encoding_arg_issue_193(self): + """ + This used to be True: bytes(str(u'abc'), 'utf8') == b"b'abc'" + """ + u = u'abc' + b = bytes(str(u), 'utf8') + self.assertNotEqual(b, b"b'abc'") + self.assertEqual(b, b'abc') + self.assertEqual(b, bytes(b'abc')) + + def test_bytes_encoding_arg_non_kwarg(self): + """ + As above, but with a positional argument + """ + u = u'Unicode string: \u5b54\u5b50' + b = bytes(u, 'utf-8') + self.assertEqual(b, u.encode('utf-8')) + + nu = str(u) + b = bytes(nu, 'utf-8') + self.assertEqual(b, u.encode('utf-8')) + + def test_bytes_string_no_encoding(self): + with self.assertRaises(TypeError): + bytes(u'ABC') + + def test_bytes_int(self): + """ + In Py3, bytes(int) -> bytes object of size given by the parameter initialized with null + """ + self.assertEqual(bytes(5), b'\x00\x00\x00\x00\x00') + # Test using newint: + self.assertEqual(bytes(int(5)), b'\x00\x00\x00\x00\x00') + self.assertTrue(isinstance(bytes(int(5)), bytes)) + + # Negative counts are not allowed in Py3: + with self.assertRaises(ValueError): + bytes(-1) + with self.assertRaises(ValueError): + bytes(int(-1)) + + @unittest.skipIf(utils.PY3, 'test not needed on Py3: all ints are long') + def test_bytes_long(self): + """ + As above, but explicitly feeding in a long on Py2. Note that + checks like: + isinstance(n, int) + are fragile on Py2, because isinstance(10L, int) is False. + """ + m = long(5) + n = long(-1) + self.assertEqual(bytes(m), b'\x00\x00\x00\x00\x00') + # Negative counts are not allowed in Py3: + with self.assertRaises(ValueError): + bytes(n) + + def test_bytes_empty(self): + """ + bytes() -> b'' + """ + self.assertEqual(bytes(), b'') + + def test_bytes_iterable_of_ints(self): + self.assertEqual(bytes([65, 66, 67]), b'ABC') + self.assertEqual(bytes([int(120), int(121), int(122)]), b'xyz') + + def test_bytes_bytes(self): + self.assertEqual(bytes(b'ABC'), b'ABC') + + def test_bytes_is_bytes(self): + b = bytes(b'ABC') + self.assertTrue(bytes(b) is b) + self.assertEqual(repr(bytes(b)), "b'ABC'") + + def test_bytes_fromhex(self): + self.assertEqual(bytes.fromhex('bb 0f'), b'\xbb\x0f') + self.assertEqual(bytes.fromhex('1234'), b'\x124') + self.assertEqual(bytes.fromhex('12ffa0'), b'\x12\xff\xa0') + b = b'My bytestring' + self.assertEqual(bytes(b).fromhex('bb 0f'), b'\xbb\x0f') + + def test_isinstance_bytes(self): + self.assertTrue(isinstance(bytes(b'blah'), bytes)) + + def test_isinstance_bytes_subclass(self): + """ + Issue #89 + """ + value = bytes(b'abc') + class Magic(bytes): + pass + self.assertTrue(isinstance(value, bytes)) + self.assertFalse(isinstance(value, Magic)) + + def test_isinstance_oldbytestrings_bytes(self): + """ + Watch out for this. Byte-strings produced in various places in Py2 + are of type 'str'. With 'from future.builtins import bytes', 'bytes' + is redefined to be a subclass of 'str', not just an alias for 'str'. + """ + self.assertTrue(isinstance(b'blah', bytes)) # not with the redefined bytes obj + self.assertTrue(isinstance(u'blah'.encode('utf-8'), bytes)) # not with the redefined bytes obj + + def test_bytes_getitem(self): + b = bytes(b'ABCD') + self.assertEqual(b[0], 65) + self.assertEqual(b[-1], 68) + self.assertEqual(b[0:1], b'A') + self.assertEqual(b[:], b'ABCD') + + @expectedFailurePY2 + def test_b_literal_creates_newbytes_object(self): + """ + It would nice if the b'' literal syntax could be coaxed into producing + bytes objects somehow ... ;) + """ + b = b'ABCD' + self.assertTrue(isinstance(b, bytes)) + self.assertEqual(b[0], 65) + self.assertTrue(repr(b).startswith('b')) + + def test_repr(self): + b = bytes(b'ABCD') + self.assertTrue(repr(b).startswith('b')) + + def test_str(self): + b = bytes(b'ABCD') + self.assertEqual(str(b), "b'ABCD'") + + def test_bytes_setitem(self): + b = b'ABCD' + with self.assertRaises(TypeError): + b[0] = b'B' + + def test_bytes_iteration(self): + b = bytes(b'ABCD') + for item in b: + self.assertTrue(isinstance(item, Integral)) + self.assertEqual(list(b), [65, 66, 67, 68]) + + def test_bytes_plus_unicode_string(self): + b = bytes(b'ABCD') + u = u'EFGH' + with self.assertRaises(TypeError): + b + u + + with self.assertRaises(TypeError): + u + b + + def test_bytes_plus_bytes(self): + b1 = bytes(b'ABCD') + b2 = b1 + b1 + self.assertEqual(b2, b'ABCDABCD') + self.assertTrue(isinstance(b2, bytes)) + + b3 = b1 + b'ZYXW' + self.assertEqual(b3, b'ABCDZYXW') + self.assertTrue(isinstance(b3, bytes)) + + b4 = b'ZYXW' + b1 + self.assertEqual(b4, b'ZYXWABCD') + self.assertTrue(isinstance(b4, bytes)) + + def test_find_not_found(self): + self.assertEqual(-1, bytes(b'ABCDE').find(b':')) + + def test_find_found(self): + self.assertEqual(2, bytes(b'AB:CD:E').find(b':')) + + def test_rfind_not_found(self): + self.assertEqual(-1, bytes(b'ABCDE').rfind(b':')) + + def test_rfind_found(self): + self.assertEqual(5, bytes(b'AB:CD:E').rfind(b':')) + + def test_bytes_join_bytes(self): + b = bytes(b' * ') + strings = [b'AB', b'EFGH', b'IJKL'] + result = b.join(strings) + self.assertEqual(result, b'AB * EFGH * IJKL') + self.assertTrue(isinstance(result, bytes)) + + def test_bytes_join_others(self): + b = bytes(b' ') + with self.assertRaises(TypeError): + b.join([42]) + with self.assertRaises(TypeError): + b.join(b'blah') + with self.assertRaises(TypeError): + b.join(bytes(b'blah')) + + def test_bytes_join_unicode_strings(self): + b = bytes(b'ABCD') + strings = [u'EFGH', u'IJKL'] + with self.assertRaises(TypeError): + b.join(strings) + + def test_bytes_replace(self): + b = bytes(b'ABCD') + c = b.replace(b'A', b'F') + self.assertEqual(c, b'FBCD') + self.assertTrue(isinstance(c, bytes)) + + with self.assertRaises(TypeError): + b.replace(b'A', u'F') + with self.assertRaises(TypeError): + b.replace(u'A', b'F') + + def test_bytes_partition(self): + b1 = bytes(b'ABCD') + parts = b1.partition(b'B') + self.assertEqual(parts, (b'A', b'B', b'CD')) + self.assertTrue(all([isinstance(p, bytes) for p in parts])) + + b2 = bytes(b'ABCDABCD') + parts = b2.partition(b'B') + self.assertEqual(parts, (b'A', b'B', b'CDABCD')) + + def test_bytes_rpartition(self): + b2 = bytes(b'ABCDABCD') + parts = b2.rpartition(b'B') + self.assertEqual(parts, (b'ABCDA', b'B', b'CD')) + self.assertTrue(all([isinstance(p, bytes) for p in parts])) + + def test_bytes_contains_something(self): + b = bytes(b'ABCD') + self.assertTrue(b'A' in b) + self.assertTrue(65 in b) + + self.assertTrue(b'AB' in b) + self.assertTrue(bytes([65, 66]) in b) + + self.assertFalse(b'AC' in b) + self.assertFalse(bytes([65, 67]) in b) + + self.assertFalse(b'Z' in b) + self.assertFalse(99 in b) + + with self.assertRaises(TypeError): + u'A' in b + + def test_bytes_index(self): + b = bytes(b'ABCD') + self.assertEqual(b.index(b'B'), 1) + self.assertEqual(b.index(67), 2) + + def test_startswith(self): + b = bytes(b'abcd') + self.assertTrue(b.startswith(b'a')) + self.assertTrue(b.startswith((b'a', b'b'))) + self.assertTrue(b.startswith(bytes(b'ab'))) + self.assertFalse(b.startswith((b'A', b'B'))) + + with self.assertRaises(TypeError) as cm: + b.startswith(65) + with self.assertRaises(TypeError) as cm: + b.startswith([b'A']) + exc = str(cm.exception) + # self.assertIn('bytes', exc) + # self.assertIn('tuple', exc) + + def test_endswith(self): + b = bytes(b'abcd') + self.assertTrue(b.endswith(b'd')) + self.assertTrue(b.endswith((b'c', b'd'))) + self.assertTrue(b.endswith(bytes(b'cd'))) + self.assertFalse(b.endswith((b'A', b'B'))) + + with self.assertRaises(TypeError) as cm: + b.endswith(65) + with self.assertRaises(TypeError) as cm: + b.endswith([b'D']) + exc = str(cm.exception) + # self.assertIn('bytes', exc) + # self.assertIn('tuple', exc) + + def test_decode(self): + b = bytes(b'abcd') + s = b.decode('utf-8') + self.assertEqual(s, 'abcd') + self.assertTrue(isinstance(s, str)) + + def test_encode(self): + b = bytes(b'abcd') + with self.assertRaises(AttributeError) as cm: + b.encode('utf-8') + + def test_eq(self): + """ + Equals: == + """ + b = bytes(b'ABCD') + self.assertEqual(b, b'ABCD') + self.assertTrue(b == b'ABCD') + self.assertEqual(b'ABCD', b) + self.assertEqual(b, b) + self.assertFalse(b == b'ABC') + self.assertFalse(b == bytes(b'ABC')) + self.assertFalse(b == u'ABCD') + self.assertFalse(b == str('ABCD')) + # Fails: + # self.assertFalse(u'ABCD' == b) + self.assertFalse(str('ABCD') == b) + + self.assertFalse(b == list(b)) + self.assertFalse(b == str(b)) + self.assertFalse(b == u'ABC') + self.assertFalse(bytes(b'Z') == 90) + + def test_ne(self): + b = bytes(b'ABCD') + self.assertFalse(b != b) + self.assertFalse(b != b'ABCD') + self.assertTrue(b != b'ABCDEFG') + self.assertTrue(b != bytes(b'ABCDEFG')) + self.assertTrue(b'ABCDEFG' != b) + + # self.assertTrue(b'ABCD' != u'ABCD') + self.assertTrue(b != u'ABCD') + self.assertTrue(b != u'ABCDE') + self.assertTrue(bytes(b'') != str(u'')) + self.assertTrue(str(u'') != bytes(b'')) + + self.assertTrue(b != list(b)) + self.assertTrue(b != str(b)) + + def test_hash(self): + d = {} + b = bytes(b'ABCD') + native_b = b'ABCD' + s = str('ABCD') + native_s = u'ABCD' + d[b] = b + d[s] = s + self.assertEqual(len(d), 2) + # This should overwrite d[s] but not d[b]: + d[native_s] = native_s + self.assertEqual(len(d), 2) + # This should overwrite d[native_s] again: + d[s] = s + self.assertEqual(len(d), 2) + self.assertEqual(set(d.keys()), set([s, b])) + + @unittest.expectedFailure + def test_hash_with_native_types(self): + # Warning: initializing the dict with native Py2 types throws the + # hashing out: + d = {u'ABCD': u'ABCD', b'ABCD': b'ABCD'} + # On Py2: len(d) == 1 + b = bytes(b'ABCD') + s = str('ABCD') + d[s] = s + d[b] = b + # Fails: + self.assertEqual(len(d) > 1) + + def test_add(self): + b = bytes(b'ABC') + c = bytes(b'XYZ') + d = b + c + self.assertTrue(isinstance(d, bytes)) + self.assertEqual(d, b'ABCXYZ') + f = b + b'abc' + self.assertTrue(isinstance(f, bytes)) + self.assertEqual(f, b'ABCabc') + g = b'abc' + b + self.assertTrue(isinstance(g, bytes)) + self.assertEqual(g, b'abcABC') + + def test_cmp(self): + b = bytes(b'ABC') + with self.assertRaises(TypeError): + b > 3 + with self.assertRaises(TypeError): + b > u'XYZ' + with self.assertRaises(TypeError): + b <= 3 + with self.assertRaises(TypeError): + b >= int(3) + with self.assertRaises(TypeError): + b < 3.3 + with self.assertRaises(TypeError): + b > (3.3 + 3j) + with self.assertRaises(TypeError): + b >= (1, 2) + with self.assertRaises(TypeError): + b <= [1, 2] + + def test_mul(self): + b = bytes(b'ABC') + c = b * 4 + self.assertTrue(isinstance(c, bytes)) + self.assertEqual(c, b'ABCABCABCABC') + d = b * int(4) + self.assertTrue(isinstance(d, bytes)) + self.assertEqual(d, b'ABCABCABCABC') + if utils.PY2: + e = b * long(4) + self.assertTrue(isinstance(e, bytes)) + self.assertEqual(e, b'ABCABCABCABC') + + def test_rmul(self): + b = bytes(b'XYZ') + c = 3 * b + self.assertTrue(isinstance(c, bytes)) + self.assertEqual(c, b'XYZXYZXYZ') + d = b * int(3) + self.assertTrue(isinstance(d, bytes)) + self.assertEqual(d, b'XYZXYZXYZ') + if utils.PY2: + e = long(3) * b + self.assertTrue(isinstance(e, bytes)) + self.assertEqual(e, b'XYZXYZXYZ') + + def test_slice(self): + b = bytes(b'ABCD') + c1 = b[:] + self.assertTrue(isinstance(c1, bytes)) + self.assertTrue(c1 == b) + # The following is not true, whereas it is true normally on Py2 and + # Py3. Does this matter?: + # self.assertTrue(c1 is b) + + c2 = b[10:] + self.assertTrue(isinstance(c2, bytes)) + self.assertTrue(c2 == bytes(b'')) + self.assertTrue(c2 == b'') + + c3 = b[:0] + self.assertTrue(isinstance(c3, bytes)) + self.assertTrue(c3 == bytes(b'')) + self.assertTrue(c3 == b'') + + c4 = b[:1] + self.assertTrue(isinstance(c4, bytes)) + self.assertTrue(c4 == bytes(b'A')) + self.assertTrue(c4 == b'A') + + c5 = b[:-1] + self.assertTrue(isinstance(c5, bytes)) + self.assertTrue(c5 == bytes(b'ABC')) + self.assertTrue(c5 == b'ABC') + + def test_bytes_frozenset(self): + _ALWAYS_SAFE = bytes(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + b'abcdefghijklmnopqrstuvwxyz' + b'0123456789' + b'_.-') # from Py3.3's urllib.parse + s = frozenset(_ALWAYS_SAFE) + self.assertTrue(65 in s) + self.assertFalse(64 in s) + # Convert back to bytes + b1 = bytes(s) + self.assertTrue(65 in b1) + self.assertEqual(set(b1), set(_ALWAYS_SAFE)) + + def test_bytes_within_range(self): + """ + Python 3 does this: + >>> bytes([255, 254, 256]) + ValueError + ... + ValueError: bytes must be in range(0, 256) + + Ensure our bytes() constructor has the same behaviour + """ + b1 = bytes([254, 255]) + self.assertEqual(b1, b'\xfe\xff') + with self.assertRaises(ValueError): + b2 = bytes([254, 255, 256]) + + def test_bytes_hasattr_encode(self): + """ + This test tests whether hasattr(b, 'encode') is False, like it is on Py3. + """ + b = bytes(b'abcd') + self.assertFalse(hasattr(b, 'encode')) + self.assertTrue(hasattr(b, 'decode')) + + def test_quote_from_bytes(self): + """ + This test was failing in the backported urllib.parse module in quote_from_bytes + """ + empty = bytes([]) + self.assertEqual(empty, b'') + self.assertTrue(type(empty), bytes) + + empty2 = bytes(()) + self.assertEqual(empty2, b'') + self.assertTrue(type(empty2), bytes) + + safe = bytes(u'Philosopher guy: 孔子. More text here.'.encode('utf-8')) + safe = bytes([c for c in safe if c < 128]) + self.assertEqual(safe, b'Philosopher guy: . More text here.') + self.assertTrue(type(safe), bytes) + + def test_rstrip(self): + b = bytes(b'abcd') + c = b.rstrip(b'd') + self.assertEqual(c, b'abc') + self.assertEqual(type(c), type(b)) + + def test_maketrans(self): + """ + Issue #51. + + Test is from Py3.3.5. + """ + transtable = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff' + self.assertEqual(bytes.maketrans(b'', b''), transtable) + + transtable = b'\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' + self.assertEqual(bytes.maketrans(b'abc', b'xyz'), transtable) + + transtable = b'\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374xyz' + self.assertEqual(bytes.maketrans(b'\375\376\377', b'xyz'), transtable) + self.assertRaises(ValueError, bytes.maketrans, b'abc', b'xyzq') + self.assertRaises(TypeError, bytes.maketrans, 'abc', 'def') + + @unittest.skipUnless(utils.PY2, 'test requires Python 2') + def test_mod_custom_dict(self): + import UserDict + + class MyDict(UserDict.UserDict): + pass + + d = MyDict() + d['foo'] = bytes(b'bar') + self.assertFalse(isinstance(d, dict)) + self.assertTrue(isinstance(d, UserDict.UserDict)) + + self.assertEqual(bytes(b'%(foo)s') % d, b'bar') + + @unittest.skipUnless(utils.PY35_PLUS or utils.PY2, + 'test requires Python 2 or 3.5+') + def test_mod_more(self): + self.assertEqual(b'%s' % b'aaa', b'aaa') + self.assertEqual(bytes(b'%s') % b'aaa', b'aaa') + self.assertEqual(bytes(b'%s') % bytes(b'aaa'), b'aaa') + + self.assertEqual(b'%s' % (b'aaa',), b'aaa') + self.assertEqual(bytes(b'%s') % (b'aaa',), b'aaa') + self.assertEqual(bytes(b'%s') % (bytes(b'aaa'),), b'aaa') + + self.assertEqual(bytes(b'%(x)s') % {b'x': b'aaa'}, b'aaa') + self.assertEqual(bytes(b'%(x)s') % {b'x': bytes(b'aaa')}, b'aaa') + + @unittest.skipUnless(utils.PY35_PLUS or utils.PY2, + 'test requires Python 2 or 3.5+') + def test_mod(self): + """ + From Py3.5 test suite (post-PEP 461). + + The bytes mod code is in _PyBytes_Format() in bytesobject.c in Py3.5. + """ + + # XXX Add support for %b! + # + # b = bytes(b'hello, %b!') + # orig = b + # b = b % b'world' + # self.assertEqual(b, b'hello, world!') + # self.assertEqual(orig, b'hello, %b!') + # self.assertFalse(b is orig) + + b = bytes(b'%s / 100 = %d%%') + a = b % (b'seventy-nine', 79) + self.assertEqual(a, b'seventy-nine / 100 = 79%') + + b = bytes(b'%s / 100 = %d%%') + a = b % (bytes(b'seventy-nine'), 79) + self.assertEqual(a, b'seventy-nine / 100 = 79%') + + @unittest.skipUnless(utils.PY35_PLUS or utils.PY2, + 'test requires Python 2 or 3.5+') + def test_imod(self): + """ + From Py3.5 test suite (post-PEP 461) + """ + # if (3, 0) <= sys.version_info[:2] < (3, 5): + # raise unittest.SkipTest('bytes % not yet implemented on Py3.0-3.4') + + # b = bytes(b'hello, %b!') + # orig = b + # b %= b'world' + # self.assertEqual(b, b'hello, world!') + # self.assertEqual(orig, b'hello, %b!') + # self.assertFalse(b is orig) + + b = bytes(b'%s / 100 = %d%%') + b %= (b'seventy-nine', 79) + self.assertEqual(b, b'seventy-nine / 100 = 79%') + + b = bytes(b'%s / 100 = %d%%') + b %= (bytes(b'seventy-nine'), 79) + self.assertEqual(b, b'seventy-nine / 100 = 79%') + + # def test_mod_pep_461(self): + # """ + # Test for the PEP 461 functionality (resurrection of %s formatting for + # bytes). + # """ + # b1 = bytes(b'abc%b') + # b2 = b1 % b'def' + # self.assertEqual(b2, b'abcdef') + # self.assertTrue(isinstance(b2, bytes)) + # self.assertEqual(type(b2), bytes) + # b3 = b1 % bytes(b'def') + # self.assertEqual(b3, b'abcdef') + # self.assertTrue(isinstance(b3, bytes)) + # self.assertEqual(type(b3), bytes) + # + # # %s is supported for backwards compatibility with Py2's str + # b4 = bytes(b'abc%s') + # b5 = b4 % b'def' + # self.assertEqual(b5, b'abcdef') + # self.assertTrue(isinstance(b5, bytes)) + # self.assertEqual(type(b5), bytes) + # b6 = b4 % bytes(b'def') + # self.assertEqual(b6, b'abcdef') + # self.assertTrue(isinstance(b6, bytes)) + # self.assertEqual(type(b6), bytes) + # + # self.assertEqual(bytes(b'%c') % 48, b'0') + # self.assertEqual(bytes(b'%c') % b'a', b'a') + # + # # For any numeric code %x, formatting of + # # b"%x" % val + # # is supposed to be equivalent to + # # ("%x" % val).encode("ascii") + # for code in b'xdiouxXeEfFgG': + # bytechar = bytes([code]) + # pct_str = u"%" + bytechar.decode('ascii') + # for val in range(300): + # self.assertEqual(bytes(b"%" + bytechar) % val, + # (pct_str % val).encode("ascii")) + # + # with self.assertRaises(TypeError): + # bytes(b'%b') % 3.14 + # # Traceback (most recent call last): + # # ... + # # TypeError: b'%b' does not accept 'float' + # + # with self.assertRaises(TypeError): + # bytes(b'%b') % 'hello world!' + # # Traceback (most recent call last): + # # ... + # # TypeError: b'%b' does not accept 'str' + # + # self.assertEqual(bytes(b'%a') % 3.14, b'3.14') + # + # self.assertEqual(bytes(b'%a') % b'abc', b"b'abc'") + # self.assertEqual(bytes(b'%a') % bytes(b'abc'), b"b'abc'") + # + # self.assertEqual(bytes(b'%a') % 'def', b"'def'") + # + # # PEP 461 was updated after an Py3.5 alpha release to specify that %r is now supported + # # for compatibility: http://legacy.python.org/dev/peps/pep-0461/#id16 + # assert bytes(b'%r' % b'abc') == bytes(b'%a' % b'abc') + # + # # with self.assertRaises(TypeError): + # # bytes(b'%r' % 'abc') + + @expectedFailurePY2 + def test_multiple_inheritance(self): + """ + Issue #96 (for newbytes instead of newobject) + """ + if utils.PY2: + from collections import Container + else: + from collections.abc import Container + + class Base(bytes): + pass + + class Foo(Base, Container): + def __contains__(self, item): + return False + + @expectedFailurePY2 + def test_with_metaclass_and_bytes(self): + """ + Issue #91 (for newdict instead of newobject) + """ + from future.utils import with_metaclass + + class MetaClass(type): + pass + + class TestClass(with_metaclass(MetaClass, bytes)): + pass + + def test_surrogateescape_decoding(self): + """ + Tests whether surrogateescape decoding works correctly. + """ + pairs = [(u'\udcc3', b'\xc3'), + (u'\udcff', b'\xff')] + + for (s, b) in pairs: + decoded = bytes(b).decode('utf-8', 'surrogateescape') + self.assertEqual(s, decoded) + self.assertTrue(isinstance(decoded, str)) + self.assertEqual(b, decoded.encode('utf-8', 'surrogateescape')) + + def test_issue_171_part_a(self): + b1 = str(u'abc \u0123 do re mi').encode(u'utf_8') + b2 = bytes(u'abc \u0123 do re mi', u'utf_8') + b3 = bytes(str(u'abc \u0123 do re mi'), u'utf_8') + + @expectedFailurePY2 + def test_issue_171_part_b(self): + """ + Tests whether: + >>> nativebytes = bytes ; nativestr = str ; from builtins import * + >>> nativebytes(bytes(b'asdf'))[0] == b'a' == b'asdf' + """ + nativebytes = type(b'') + nativestr = type('') + b = nativebytes(bytes(b'asdf')) + self.assertEqual(b, b'asdf') + + def test_cast_to_bytes(self): + """ + Tests whether __bytes__ method is called + """ + + class TestObject: + def __bytes__(self): + return b'asdf' + + self.assertEqual(bytes(TestObject()), b'asdf') + + def test_cast_to_bytes_iter_precedence(self): + """ + Tests that call to __bytes__ is preferred to iteration + """ + + class TestObject: + def __bytes__(self): + return b'asdf' + + def __iter__(self): + return iter(b'hjkl') + + self.assertEqual(bytes(TestObject()), b'asdf') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_future/test_chainmap.py b/tests/test_future/test_chainmap.py new file mode 100644 index 00000000..2440401b --- /dev/null +++ b/tests/test_future/test_chainmap.py @@ -0,0 +1,160 @@ +""" +Tests for the future.standard_library module +""" + +from __future__ import absolute_import, print_function +from future import standard_library +from future import utils +from future.tests.base import unittest, CodeHandler, expectedFailurePY2 + +import sys +import tempfile +import os +import copy +import textwrap +from subprocess import CalledProcessError + + +class TestChainMap(CodeHandler): + + def setUp(self): + self.interpreter = sys.executable + standard_library.install_aliases() + super(TestChainMap, self).setUp() + + def tearDown(self): + # standard_library.remove_hooks() + pass + + @staticmethod + def simple_cm(): + from collections import ChainMap + c = ChainMap() + c['one'] = 1 + c['two'] = 2 + + cc = c.new_child() + cc['one'] = 'one' + + return c, cc + + + def test_repr(self): + c, cc = TestChainMap.simple_cm() + + order1 = "ChainMap({'one': 'one'}, {'one': 1, 'two': 2})" + order2 = "ChainMap({'one': 'one'}, {'two': 2, 'one': 1})" + assert repr(cc) in [order1, order2] + + + def test_recursive_repr(self): + """ + Test for degnerative recursive cases. Very unlikely in + ChainMaps. But all must bow before the god of testing coverage. + """ + from collections import ChainMap + c = ChainMap() + c['one'] = c + assert repr(c) == "ChainMap({'one': ...})" + + + def test_get(self): + c, cc = TestChainMap.simple_cm() + + assert cc.get('two') == 2 + assert cc.get('three') == None + assert cc.get('three', 'notthree') == 'notthree' + + + def test_bool(self): + from collections import ChainMap + c = ChainMap() + assert not(bool(c)) + + c['one'] = 1 + c['two'] = 2 + assert bool(c) + + cc = c.new_child() + cc['one'] = 'one' + assert cc + + + def test_fromkeys(self): + from collections import ChainMap + keys = 'a b c'.split() + c = ChainMap.fromkeys(keys) + assert len(c) == 3 + assert c['a'] == None + assert c['b'] == None + assert c['c'] == None + + + def test_copy(self): + c, cc = TestChainMap.simple_cm() + new_cc = cc.copy() + assert new_cc is not cc + assert sorted(new_cc.items()) == sorted(cc.items()) + + + def test_parents(self): + c, cc = TestChainMap.simple_cm() + + new_c = cc.parents + assert c is not new_c + assert len(new_c) == 2 + assert new_c['one'] == c['one'] + assert new_c['two'] == c['two'] + + + def test_delitem(self): + c, cc = TestChainMap.simple_cm() + + with self.assertRaises(KeyError): + del cc['two'] + + del cc['one'] + assert len(cc) == 2 + assert cc['one'] == 1 + assert cc['two'] == 2 + + + def test_popitem(self): + c, cc = TestChainMap.simple_cm() + + assert cc.popitem() == ('one', 'one') + + with self.assertRaises(KeyError): + cc.popitem() + + + def test_pop(self): + c, cc = TestChainMap.simple_cm() + + assert cc.pop('one') == 'one' + + with self.assertRaises(KeyError): + cc.pop('two') + + assert len(cc) == 2 + + + def test_clear(self): + c, cc = TestChainMap.simple_cm() + + cc.clear() + assert len(cc) == 2 + assert cc['one'] == 1 + assert cc['two'] == 2 + + + def test_missing(self): + + c, cc = TestChainMap.simple_cm() + + with self.assertRaises(KeyError): + cc['clown'] + + +if __name__ == '__main__': + unittest.main() diff --git a/future/tests/test_common_iterators.py b/tests/test_future/test_common_iterators.py similarity index 81% rename from future/tests/test_common_iterators.py rename to tests/test_future/test_common_iterators.py index 73cd5bae..d274c23a 100644 --- a/future/tests/test_common_iterators.py +++ b/tests/test_future/test_common_iterators.py @@ -10,6 +10,10 @@ def test_range(self): self.assertEqual(sum(range(10)), 45) self.assertTrue(9 in range(10)) self.assertEqual(list(range(5)), [0, 1, 2, 3, 4]) + self.assertEqual(repr(range(10)), 'range(0, 10)') + self.assertEqual(repr(range(1, 10)), 'range(1, 10)') + self.assertEqual(repr(range(1, 1)), 'range(1, 1)') + self.assertEqual(repr(range(-10, 10, 2)), 'range(-10, 10, 2)') def test_map(self): def square(x): diff --git a/tests/test_future/test_count.py b/tests/test_future/test_count.py new file mode 100644 index 00000000..cc849bd5 --- /dev/null +++ b/tests/test_future/test_count.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +""" +Tests for the backported class:`range` class. +""" +from itertools import count as it_count + +from future.backports.misc import count +from future.tests.base import unittest, skip26 + + +class CountTest(unittest.TestCase): + + """Test the count function.""" + + def _test_count_func(self, func): + self.assertEqual(next(func(1)), 1) + self.assertEqual(next(func(start=1)), 1) + + c = func() + self.assertEqual(next(c), 0) + self.assertEqual(next(c), 1) + self.assertEqual(next(c), 2) + c = func(1, 1) + self.assertEqual(next(c), 1) + self.assertEqual(next(c), 2) + c = func(step=1) + self.assertEqual(next(c), 0) + self.assertEqual(next(c), 1) + c = func(start=1, step=1) + self.assertEqual(next(c), 1) + self.assertEqual(next(c), 2) + + c = func(-1) + self.assertEqual(next(c), -1) + self.assertEqual(next(c), 0) + self.assertEqual(next(c), 1) + c = func(1, -1) + self.assertEqual(next(c), 1) + self.assertEqual(next(c), 0) + self.assertEqual(next(c), -1) + c = func(-1, -1) + self.assertEqual(next(c), -1) + self.assertEqual(next(c), -2) + self.assertEqual(next(c), -3) + + def test_count(self): + """Test the count function.""" + self._test_count_func(count) + + @skip26 + def test_own_count(self): + """Test own count implementation.""" + self._test_count_func(it_count) + + +if __name__ == '__main__': + unittest.main() diff --git a/future/tests/test_decorators.py b/tests/test_future/test_decorators.py similarity index 91% rename from future/tests/test_decorators.py rename to tests/test_future/test_decorators.py index b586d972..9ec2bb37 100644 --- a/future/tests/test_decorators.py +++ b/tests/test_future/test_decorators.py @@ -23,10 +23,6 @@ def __str__(self): if not utils.PY3: assert hasattr(a, '__unicode__') self.assertEqual(str(a), my_unicode_str) - - @unittest.expectedFailure - def test_str_encode_returns_bytes(self): - " The following fails but should ideally pass: " self.assertTrue(isinstance(str(a).encode('utf-8'), bytes)) # Manual equivalent on Py2 without the decorator: @@ -40,14 +36,14 @@ def __str__(self): assert str(a) == str(b) def test_implements_iterator(self): - + @implements_iterator class MyIter(object): def __next__(self): return 'Next!' def __iter__(self): return self - + itr = MyIter() self.assertEqual(next(itr), 'Next!') @@ -57,3 +53,5 @@ def __iter__(self): break self.assertEqual(item, 'Next!') +if __name__ == '__main__': + unittest.main() diff --git a/future/tests/test_dict.py b/tests/test_future/test_dict.py similarity index 65% rename from future/tests/test_dict.py rename to tests/test_future/test_dict.py index 599d4a57..ff9dd4ab 100644 --- a/future/tests/test_dict.py +++ b/tests/test_future/test_dict.py @@ -6,9 +6,10 @@ from __future__ import absolute_import, unicode_literals, print_function from future.builtins import * from future import utils -from future.tests.base import unittest +from future.tests.base import unittest, expectedFailurePY2 import os +import sys class TestDict(unittest.TestCase): def setUp(self): @@ -21,6 +22,17 @@ def test_dict_empty(self): """ self.assertEqual(dict(), {}) + def test_dict_dict(self): + """ + Exrapolated from issue #50 -- newlist(newlist([...])) + """ + d = dict({1: 2, 2: 4, 3: 9}) + d2 = dict(d) + self.assertEqual(len(d2), 3) + self.assertEqual(d2, d) + self.assertTrue(isinstance(d2, dict)) + self.assertTrue(type(d2) == dict) + def test_dict_eq(self): d = self.d1 self.assertEqual(dict(d), d) @@ -52,6 +64,16 @@ def test_isinstance_dict(self): d = self.d1 self.assertTrue(isinstance(d, dict)) + def test_isinstance_dict_subclass(self): + """ + Issue #89 + """ + value = dict() + class Magic(dict): + pass + self.assertTrue(isinstance(value, dict)) + self.assertFalse(isinstance(value, Magic)) + def test_dict_getitem(self): d = dict({'C': 1, 'B': 2, 'A': 3}) self.assertEqual(d['C'], 1) @@ -75,7 +97,7 @@ def test_set_like_behaviour(self): assert isinstance(d1.values() | d2.keys(), set) assert isinstance(d1.items() | d2.items(), set) - @unittest.expectedFailure + @expectedFailurePY2 def test_braces_create_newdict_object(self): """ It would nice if the {} dict syntax could be coaxed @@ -84,6 +106,37 @@ def test_braces_create_newdict_object(self): d = self.d1 self.assertTrue(type(d) == dict) + @expectedFailurePY2 + def test_multiple_inheritance(self): + """ + Issue #96 (for newdict instead of newobject) + """ + if utils.PY2: + from collections import Container + else: + from collections.abc import Container + + class Base(dict): + pass + + class Foo(Base, Container): + def __contains__(self, item): + return False + + @expectedFailurePY2 + def test_with_metaclass_and_dict(self): + """ + Issue #91 (for newdict instead of newobject) + """ + from future.utils import with_metaclass + + class MetaClass(type): + pass + + class TestClass(with_metaclass(MetaClass, dict)): + pass + + if __name__ == '__main__': unittest.main() diff --git a/tests/test_future/test_email_generation.py b/tests/test_future/test_email_generation.py new file mode 100644 index 00000000..10e61138 --- /dev/null +++ b/tests/test_future/test_email_generation.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +"""Tests for email generation.""" + +from __future__ import unicode_literals + +from future.backports.email.mime.multipart import MIMEMultipart +from future.backports.email.mime.text import MIMEText +from future.backports.email.utils import formatdate +from future.tests.base import unittest + + +class EmailGenerationTests(unittest.TestCase): + def test_email_custom_header_can_contain_unicode(self): + msg = MIMEMultipart() + alternative = MIMEMultipart('alternative') + alternative.attach(MIMEText('Plain content with Únicødê', _subtype='plain', _charset='utf-8')) + alternative.attach(MIMEText('HTML content with Únicødê', _subtype='html', _charset='utf-8')) + msg.attach(alternative) + + msg['Subject'] = 'Subject with Únicødê' + msg['From'] = 'sender@test.com' + msg['To'] = 'recipient@test.com' + msg['Date'] = formatdate(None, localtime=True) + msg['Message-ID'] = 'anIdWithÚnicødêForThisEmail' + + msg_lines = msg.as_string().split('\n') + self.assertEqual(msg_lines[2], 'Subject: =?utf-8?b?U3ViamVjdCB3aXRoIMOabmljw7hkw6o=?=') + self.assertEqual(msg_lines[6], 'Message-ID: =?utf-8?b?YW5JZFdpdGjDmm5pY8O4ZMOqRm9yVGhpc0VtYWls?=') + self.assertEqual(msg_lines[17], 'UGxhaW4gY29udGVudCB3aXRoIMOabmljw7hkw6o=') + self.assertEqual(msg_lines[24], 'SFRNTCBjb250ZW50IHdpdGggw5puaWPDuGTDqg==') diff --git a/tests/test_future/test_email_multipart.py b/tests/test_future/test_email_multipart.py new file mode 100644 index 00000000..cbd93b89 --- /dev/null +++ b/tests/test_future/test_email_multipart.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +"""Tests for multipart emails.""" + +from future.tests.base import unittest +import future.backports.email as email +import future.backports.email.mime.multipart +from future.builtins import list + +class EmailMultiPartTests(unittest.TestCase): + """Tests for handling multipart email Messages.""" + + def test_multipart_serialize_without_boundary(self): + """Tests that serializing an empty multipart email does not fail.""" + multipart_message = email.mime.multipart.MIMEMultipart() + self.assertIsNot(multipart_message.as_string(), None) + + def test_multipart_set_boundary_does_not_change_header_type(self): + """ + Tests that Message.set_boundary() does not cause Python2 errors. + + In particular, tests that set_boundary does not cause the type of the + message headers list to be changed from the future built-in list. + """ + multipart_message = email.mime.multipart.MIMEMultipart() + headers_type = type(multipart_message._headers) + self.assertEqual(headers_type, type(list())) + + boundary = '===============6387699881409002085==' + multipart_message.set_boundary(boundary) + headers_type = type(multipart_message._headers) + self.assertEqual(headers_type, type(list())) diff --git a/future/tests/test_explicit_imports.py b/tests/test_future/test_explicit_imports.py similarity index 100% rename from future/tests/test_explicit_imports.py rename to tests/test_future/test_explicit_imports.py diff --git a/tests/test_future/test_futurize.py b/tests/test_future/test_futurize.py new file mode 100644 index 00000000..c3696a54 --- /dev/null +++ b/tests/test_future/test_futurize.py @@ -0,0 +1,1433 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function + +import pprint +import tempfile +from subprocess import Popen, PIPE +import os + +from libfuturize.fixer_util import is_shebang_comment, is_encoding_comment +from lib2to3.fixer_util import FromImport +from lib2to3.pytree import Leaf, Node +from lib2to3.pygram import token + +from future.tests.base import (CodeHandler, unittest, skip26, reformat_code, + order_future_lines, expectedFailurePY26) +from future.utils import PY2 + + +class TestLibFuturize(unittest.TestCase): + + def setUp(self): + # For tests that need a text file: + _, self.textfilename = tempfile.mkstemp(text=True) + super(TestLibFuturize, self).setUp() + + def tearDown(self): + os.unlink(self.textfilename) + + def test_correct_exit_status(self): + """ + Issue #119: futurize and pasteurize were not exiting with the correct + status code. This is because the status code returned from + libfuturize.main.main() etc. was a ``newint``, which sys.exit() always + translates into 1! + """ + from libfuturize.main import main + retcode = main([self.textfilename]) + self.assertTrue(isinstance(retcode, int)) # i.e. Py2 builtin int + + def test_is_shebang_comment(self): + """ + Tests whether the fixer_util.is_encoding_comment() function is working. + """ + shebang_comments = [u'#!/usr/bin/env python\n' + u"#!/usr/bin/python2\n", + u"#! /usr/bin/python3\n", + ] + not_shebang_comments = [u"# I saw a giant python\n", + u"# I have never seen a python2\n", + ] + for comment in shebang_comments: + node = FromImport(u'math', [Leaf(token.NAME, u'cos', prefix=" ")]) + node.prefix = comment + self.assertTrue(is_shebang_comment(node)) + + for comment in not_shebang_comments: + node = FromImport(u'math', [Leaf(token.NAME, u'cos', prefix=" ")]) + node.prefix = comment + self.assertFalse(is_shebang_comment(node)) + + + def test_is_encoding_comment(self): + """ + Tests whether the fixer_util.is_encoding_comment() function is working. + """ + encoding_comments = [u"# coding: utf-8", + u"# encoding: utf-8", + u"# -*- coding: latin-1 -*-", + u"# vim: set fileencoding=iso-8859-15 :", + ] + not_encoding_comments = [u"# We use the file encoding utf-8", + u"coding = 'utf-8'", + u"encoding = 'utf-8'", + ] + for comment in encoding_comments: + node = FromImport(u'math', [Leaf(token.NAME, u'cos', prefix=" ")]) + node.prefix = comment + self.assertTrue(is_encoding_comment(node)) + + for comment in not_encoding_comments: + node = FromImport(u'math', [Leaf(token.NAME, u'cos', prefix=" ")]) + node.prefix = comment + self.assertFalse(is_encoding_comment(node)) + + +class TestFuturizeSimple(CodeHandler): + """ + This class contains snippets of Python 2 code (invalid Python 3) and + tests for whether they can be passed to ``futurize`` and immediately + run under both Python 2 again and Python 3. + """ + + def test_encoding_comments_kept_at_top(self): + """ + Issues #10 and #97: If there is a source encoding comment line + (PEP 263), is it kept at the top of a module by ``futurize``? + """ + before = """ + # coding=utf-8 + + print 'Hello' + """ + after = """ + # coding=utf-8 + + from __future__ import print_function + print('Hello') + """ + self.convert_check(before, after) + + before = """ + #!/usr/bin/env python + # -*- coding: latin-1 -*-" + + print 'Hello' + """ + after = """ + #!/usr/bin/env python + # -*- coding: latin-1 -*-" + + from __future__ import print_function + print('Hello') + """ + self.convert_check(before, after) + + def test_multiline_future_import(self): + """ + Issue #113: don't crash if a future import has multiple lines + """ + text = """ + from __future__ import ( + division + ) + """ + self.convert(text) + + def test_shebang_blank_with_future_division_import(self): + """ + Issue #43: Is shebang line preserved as the first + line by futurize when followed by a blank line? + """ + before = """ + #!/usr/bin/env python + + import math + 1 / 5 + """ + after = """ + #!/usr/bin/env python + + from __future__ import division + from past.utils import old_div + import math + old_div(1, 5) + """ + self.convert_check(before, after) + + def test_shebang_blank_with_print_import(self): + before = """ + #!/usr/bin/env python + + import math + print 'Hello' + """ + after = """ + #!/usr/bin/env python + from __future__ import print_function + + import math + print('Hello') + """ + self.convert_check(before, after) + + def test_shebang_comment(self): + """ + Issue #43: Is shebang line preserved as the first + line by futurize when followed by a comment? + """ + before = """ + #!/usr/bin/env python + # some comments + # and more comments + + import math + print 'Hello!' + """ + after = """ + #!/usr/bin/env python + # some comments + # and more comments + from __future__ import print_function + + import math + print('Hello!') + """ + self.convert_check(before, after) + + def test_shebang_docstring(self): + """ + Issue #43: Is shebang line preserved as the first + line by futurize when followed by a docstring? + """ + before = ''' + #!/usr/bin/env python + """ + a doc string + """ + import math + print 'Hello!' + ''' + after = ''' + #!/usr/bin/env python + """ + a doc string + """ + from __future__ import print_function + import math + print('Hello!') + ''' + self.convert_check(before, after) + + def test_oldstyle_classes(self): + """ + Stage 2 should convert old-style to new-style classes. This makes + the new-style class explicit and reduces the gap between the + behaviour (e.g. method resolution order) on Py2 and Py3. It also + allows us to provide ``newobject`` (see + test_oldstyle_classes_iterator). + """ + before = """ + class Blah: + pass + """ + after = """ + from builtins import object + class Blah(object): + pass + """ + self.convert_check(before, after, ignore_imports=False) + + def test_oldstyle_classes_iterator(self): + """ + An old-style class used as an iterator should be converted + properly. This requires ``futurize`` to do both steps (adding + inheritance from object and adding the newobject import) in the + right order. Any next() method should also be renamed to __next__. + """ + before = """ + class Upper: + def __init__(self, iterable): + self._iter = iter(iterable) + def next(self): + return next(self._iter).upper() + def __iter__(self): + return self + + assert list(Upper('hello')) == list('HELLO') + """ + after = """ + from builtins import next + from builtins import object + class Upper(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def __next__(self): + return next(self._iter).upper() + def __iter__(self): + return self + + assert list(Upper('hello')) == list('HELLO') + """ + self.convert_check(before, after, ignore_imports=False) + + # Try it again with this convention: class Upper(): + before2 = """ + class Upper(): + def __init__(self, iterable): + self._iter = iter(iterable) + def next(self): + return next(self._iter).upper() + def __iter__(self): + return self + + assert list(Upper('hello')) == list('HELLO') + """ + self.convert_check(before2, after) + + @unittest.expectedFailure + def test_problematic_string(self): + """ This string generates a SyntaxError on Python 3 unless it has + an r prefix. + """ + before = r""" + s = 'The folder is "C:\Users"'. + """ + after = r""" + s = r'The folder is "C:\Users"'. + """ + self.convert_check(before, after) + + @unittest.skip('--tobytes feature removed for now ...') + def test_tobytes(self): + """ + The --tobytes option converts all UNADORNED string literals 'abcd' to b'abcd'. + It does apply to multi-line strings but doesn't apply if it's a raw + string, because ur'abcd' is a SyntaxError on Python 2 and br'abcd' is a + SyntaxError on Python 3. + """ + before = r""" + s0 = '1234' + s1 = '''5678 + ''' + s2 = "9abc" + # Unchanged: + s3 = r'1234' + s4 = R"defg" + s5 = u'hijk' + s6 = u"lmno" + s7 = b'lmno' + s8 = b"pqrs" + """ + after = r""" + s0 = b'1234' + s1 = b'''5678 + ''' + s2 = b"9abc" + # Unchanged: + s3 = r'1234' + s4 = R"defg" + s5 = u'hijk' + s6 = u"lmno" + s7 = b'lmno' + s8 = b"pqrs" + """ + self.convert_check(before, after, tobytes=True) + + def test_cmp(self): + before = """ + assert cmp(1, 2) == -1 + assert cmp(2, 1) == 1 + """ + after = """ + from past.builtins import cmp + assert cmp(1, 2) == -1 + assert cmp(2, 1) == 1 + """ + self.convert_check(before, after, stages=(1, 2), ignore_imports=False) + + def test_execfile(self): + before = """ + with open('mytempfile.py', 'w') as f: + f.write('x = 1') + execfile('mytempfile.py') + x += 1 + assert x == 2 + """ + after = """ + from past.builtins import execfile + with open('mytempfile.py', 'w') as f: + f.write('x = 1') + execfile('mytempfile.py') + x += 1 + assert x == 2 + """ + self.convert_check(before, after, stages=(1, 2), ignore_imports=False) + + @unittest.expectedFailure + def test_izip(self): + before = """ + from itertools import izip + for (a, b) in izip([1, 3, 5], [2, 4, 6]): + pass + """ + after = """ + from builtins import zip + for (a, b) in zip([1, 3, 5], [2, 4, 6]): + pass + """ + self.convert_check(before, after, stages=(1, 2), ignore_imports=False) + + def test_UserList(self): + before = """ + from UserList import UserList + a = UserList([1, 3, 5]) + assert len(a) == 3 + """ + after = """ + from collections import UserList + a = UserList([1, 3, 5]) + assert len(a) == 3 + """ + self.convert_check(before, after, stages=(1, 2), ignore_imports=True) + + @unittest.expectedFailure + def test_no_unneeded_list_calls(self): + """ + TODO: get this working + """ + code = """ + for (a, b) in zip(range(3), range(3, 6)): + pass + """ + self.unchanged(code) + + @expectedFailurePY26 + def test_import_builtins(self): + before = """ + a = raw_input() + b = open(a, b, c) + c = filter(a, b) + d = map(a, b) + e = isinstance(a, str) + f = bytes(a, encoding='utf-8') + for g in xrange(10**10): + pass + h = reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) + super(MyClass, self) + """ + after = """ + from builtins import bytes + from builtins import filter + from builtins import input + from builtins import map + from builtins import range + from functools import reduce + a = input() + b = open(a, b, c) + c = list(filter(a, b)) + d = list(map(a, b)) + e = isinstance(a, str) + f = bytes(a, encoding='utf-8') + for g in range(10**10): + pass + h = reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) + super(MyClass, self) + """ + self.convert_check(before, after, ignore_imports=False, run=False) + + @expectedFailurePY26 + def test_input_without_import(self): + before = """ + a = input() + """ + after = """ + from builtins import input + a = eval(input()) + """ + self.convert_check(before, after, ignore_imports=False, run=False) + + def test_input_with_import(self): + before = """ + from builtins import input + a = input() + """ + after = """ + from builtins import input + a = input() + """ + self.convert_check(before, after, ignore_imports=False, run=False) + + def test_xrange(self): + """ + The ``from builtins import range`` line was being added to the + bottom of the file as of v0.11.4, but only using Py2.7's lib2to3. + (Py3.3's lib2to3 seems to work.) + """ + before = """ + for i in xrange(10): + pass + """ + after = """ + from builtins import range + for i in range(10): + pass + """ + self.convert_check(before, after, ignore_imports=False) + + def test_source_coding_utf8(self): + """ + Tests to ensure that the source coding line is not corrupted or + removed. It must be left as the first line in the file (including + before any __future__ imports). Also tests whether the unicode + characters in this encoding are parsed correctly and left alone. + """ + code = """ + # -*- coding: utf-8 -*- + icons = [u"◐", u"◓", u"◑", u"◒"] + """ + + def test_exception_syntax(self): + """ + Test of whether futurize handles the old-style exception syntax + """ + before = """ + try: + pass + except IOError, e: + val = e.errno + """ + after = """ + try: + pass + except IOError as e: + val = e.errno + """ + self.convert_check(before, after) + + def test_super(self): + """ + This tests whether futurize keeps the old two-argument super() calls the + same as before. It should, because this still works in Py3. + """ + code = ''' + class VerboseList(list): + def append(self, item): + print('Adding an item') + super(VerboseList, self).append(item) + ''' + self.unchanged(code) + + @unittest.expectedFailure + def test_file(self): + """ + file() as a synonym for open() is obsolete and invalid on Python 3. + """ + before = ''' + f = file(self.textfilename) + data = f.read() + f.close() + ''' + after = ''' + f = open(__file__) + data = f.read() + f.close() + ''' + self.convert_check(before, after) + + def test_apply(self): + before = ''' + def addup(*x): + return sum(x) + + assert apply(addup, (10,20)) == 30 + ''' + after = """ + def addup(*x): + return sum(x) + + assert addup(*(10,20)) == 30 + """ + self.convert_check(before, after) + + @unittest.skip('not implemented yet') + def test_download_pypi_package_and_test(self): + URL = 'http://pypi.python.org/pypi/{0}/json' + + import requests + package = 'future' + r = requests.get(URL.format(package)) + pprint.pprint(r.json()) + + download_url = r.json()['urls'][0]['url'] + filename = r.json()['urls'][0]['filename'] + # r2 = requests.get(download_url) + # with open('/tmp/' + filename, 'w') as tarball: + # tarball.write(r2.content) + + @expectedFailurePY26 + def test_raw_input(self): + """ + Passes in a string to the waiting input() after futurize + conversion. + + The code is the first snippet from these docs: + http://docs.python.org/2/library/2to3.html + """ + before = """ + from io import BytesIO + def greet(name): + print "Hello, {0}!".format(name) + print "What's your name?" + import sys + oldstdin = sys.stdin + + sys.stdin = BytesIO(b'Ed\\n') + name = raw_input() + greet(name.decode()) + + sys.stdin = oldstdin + assert name == b'Ed' + """ + desired = """ + from io import BytesIO + def greet(name): + print("Hello, {0}!".format(name)) + print("What's your name?") + import sys + oldstdin = sys.stdin + + sys.stdin = BytesIO(b'Ed\\n') + name = input() + greet(name.decode()) + + sys.stdin = oldstdin + assert name == b'Ed' + """ + self.convert_check(before, desired, run=False) + + for interpreter in self.interpreters: + p1 = Popen([interpreter, self.tempdir + 'mytestscript.py'], + stdout=PIPE, stdin=PIPE, stderr=PIPE) + (stdout, stderr) = p1.communicate(b'Ed') + self.assertEqual(stderr, b'') + self.assertEqual(stdout, b"What's your name?\nHello, Ed!\n") + + def test_literal_prefixes_are_not_stripped(self): + """ + Tests to ensure that the u'' and b'' prefixes on unicode strings and + byte strings are not removed by the futurize script. Removing the + prefixes on Py3.3+ is unnecessary and loses some information -- namely, + that the strings have explicitly been marked as unicode or bytes, + rather than just e.g. a guess by some automated tool about what they + are. + """ + code = ''' + s = u'unicode string' + b = b'byte string' + ''' + self.unchanged(code) + + def test_division(self): + before = """ + x = 1 / 2 + """ + after = """ + from past.utils import old_div + x = old_div(1, 2) + """ + self.convert_check(before, after, stages=[1, 2]) + + def test_already_future_division(self): + code = """ + from __future__ import division + x = 1 / 2 + assert x == 0.5 + y = 3. / 2. + assert y == 1.5 + """ + self.unchanged(code) + + +class TestFuturizeRenamedStdlib(CodeHandler): + @unittest.skip('Infinite loop?') + def test_renamed_modules(self): + before = """ + import ConfigParser + import copy_reg + import cPickle + import cStringIO + """ + after = """ + import configparser + import copyreg + import pickle + import io + """ + # We can't run the converted code because configparser may + # not be there. + self.convert_check(before, after, run=False) + + @unittest.skip('Not working yet ...') + def test_urllib_refactor(self): + # Code like this using urllib is refactored by futurize --stage2 to use + # the new Py3 module names, but ``future`` doesn't support urllib yet. + before = """ + import urllib + + URL = 'http://pypi.python.org/pypi/future/json' + package = 'future' + r = urllib.urlopen(URL.format(package)) + data = r.read() + """ + after = """ + from future import standard_library + standard_library.install_aliases() + import urllib.request + + URL = 'http://pypi.python.org/pypi/future/json' + package = 'future' + r = urllib.request.urlopen(URL.format(package)) + data = r.read() + """ + self.convert_check(before, after) + + @unittest.skip('Infinite loop?') + def test_renamed_copy_reg_and_cPickle_modules(self): + """ + Example from docs.python.org/2/library/copy_reg.html + """ + before = """ + import copy_reg + import copy + import cPickle + class C(object): + def __init__(self, a): + self.a = a + + def pickle_c(c): + print('pickling a C instance...') + return C, (c.a,) + + copy_reg.pickle(C, pickle_c) + c = C(1) + d = copy.copy(c) + p = cPickle.dumps(c) + """ + after = """ + import copyreg + import copy + import pickle + class C(object): + def __init__(self, a): + self.a = a + + def pickle_c(c): + print('pickling a C instance...') + return C, (c.a,) + + copyreg.pickle(C, pickle_c) + c = C(1) + d = copy.copy(c) + p = pickle.dumps(c) + """ + self.convert_check(before, after) + + @unittest.expectedFailure + def test_Py2_StringIO_module(self): + """ + This requires that the argument to io.StringIO be made a + unicode string explicitly if we're not using unicode_literals: + + Ideally, there would be a fixer for this. For now: + + TODO: add the Py3 equivalent for this to the docs. Also add back + a test for the unicode_literals case. + """ + before = """ + import cStringIO + import StringIO + s1 = cStringIO.StringIO('my string') + s2 = StringIO.StringIO('my other string') + assert isinstance(s1, cStringIO.InputType) + """ + + # There is no io.InputType in Python 3. futurize should change this to + # something like this. But note that the input to io.StringIO + # must be a unicode string on both Py2 and Py3. + after = """ + import io + import io + s1 = io.StringIO(u'my string') + s2 = io.StringIO(u'my other string') + assert isinstance(s1, io.StringIO) + """ + self.convert_check(before, after) + + +class TestFuturizeStage1(CodeHandler): + """ + Tests "stage 1": safe optimizations: modernizing Python 2 code so that it + uses print functions, new-style exception syntax, etc. + + The behaviour should not change and this should introduce no dependency on + the ``future`` package. It produces more modern Python 2-only code. The + goal is to reduce the size of the real porting patch-set by performing + the uncontroversial patches first. + """ + + def test_apply(self): + """ + apply() should be changed by futurize --stage1 + """ + before = ''' + def f(a, b): + return a + b + + args = (1, 2) + assert apply(f, args) == 3 + assert apply(f, ('a', 'b')) == 'ab' + ''' + after = ''' + def f(a, b): + return a + b + + args = (1, 2) + assert f(*args) == 3 + assert f(*('a', 'b')) == 'ab' + ''' + self.convert_check(before, after, stages=[1]) + + def test_next_1(self): + """ + Custom next methods should not be converted to __next__ in stage1, but + any obj.next() calls should be converted to next(obj). + """ + before = """ + class Upper: + def __init__(self, iterable): + self._iter = iter(iterable) + def next(self): # note the Py2 interface + return next(self._iter).upper() + def __iter__(self): + return self + + itr = Upper('hello') + assert itr.next() == 'H' + assert next(itr) == 'E' + assert list(itr) == list('LLO') + """ + + after = """ + class Upper: + def __init__(self, iterable): + self._iter = iter(iterable) + def next(self): # note the Py2 interface + return next(self._iter).upper() + def __iter__(self): + return self + + itr = Upper('hello') + assert next(itr) == 'H' + assert next(itr) == 'E' + assert list(itr) == list('LLO') + """ + self.convert_check(before, after, stages=[1], run=PY2) + + @unittest.expectedFailure + def test_next_2(self): + """ + This version of the above doesn't currently work: the self._iter.next() call in + line 5 isn't converted to next(self._iter). + """ + before = """ + class Upper: + def __init__(self, iterable): + self._iter = iter(iterable) + def next(self): # note the Py2 interface + return self._iter.next().upper() + def __iter__(self): + return self + + itr = Upper('hello') + assert itr.next() == 'H' + assert next(itr) == 'E' + assert list(itr) == list('LLO') + """ + + after = """ + class Upper(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def next(self): # note the Py2 interface + return next(self._iter).upper() + def __iter__(self): + return self + + itr = Upper('hello') + assert next(itr) == 'H' + assert next(itr) == 'E' + assert list(itr) == list('LLO') + """ + self.convert_check(before, after, stages=[1], run=PY2) + + def test_xrange(self): + """ + xrange should not be changed by futurize --stage1 + """ + code = ''' + for i in xrange(10): + pass + ''' + self.unchanged(code, stages=[1], run=PY2) + + @unittest.expectedFailure + def test_absolute_import_changes(self): + """ + Implicit relative imports should be converted to absolute or explicit + relative imports correctly. + + Issue #16 (with porting bokeh/bbmodel.py) + """ + with open(self.tempdir + 'specialmodels.py', 'w') as f: + f.write('pass') + + before = """ + import specialmodels.pandasmodel + specialmodels.pandasmodel.blah() + """ + after = """ + from __future__ import absolute_import + from .specialmodels import pandasmodel + pandasmodel.blah() + """ + self.convert_check(before, after, stages=[1]) + + def test_safe_futurize_imports(self): + """ + The standard library module names should not be changed until stage 2 + """ + before = """ + import ConfigParser + import HTMLParser + from itertools import ifilterfalse + + ConfigParser.ConfigParser + HTMLParser.HTMLParser + assert list(ifilterfalse(lambda x: x % 2, [2, 4])) == [2, 4] + """ + self.unchanged(before, stages=[1], run=PY2) + + def test_print(self): + before = """ + print 'Hello' + """ + after = """ + print('Hello') + """ + self.convert_check(before, after, stages=[1]) + + before = """ + import sys + print >> sys.stderr, 'Hello', 'world' + """ + after = """ + import sys + print('Hello', 'world', file=sys.stderr) + """ + self.convert_check(before, after, stages=[1]) + + def test_print_already_function(self): + """ + Running futurize --stage1 should not add a second set of parentheses + """ + before = """ + print('Hello') + """ + self.unchanged(before, stages=[1]) + + @unittest.expectedFailure + def test_print_already_function_complex(self): + """ + Running futurize --stage1 does add a second second set of parentheses + in this case. This is because the underlying lib2to3 has two distinct + grammars -- with a print statement and with a print function -- and, + when going forwards (2 to both), futurize assumes print is a statement, + which raises a ParseError. + """ + before = """ + import sys + print('Hello', 'world', file=sys.stderr) + """ + self.unchanged(before, stages=[1]) + + def test_exceptions(self): + before = """ + try: + raise AttributeError('blah') + except AttributeError, e: + pass + """ + after = """ + try: + raise AttributeError('blah') + except AttributeError as e: + pass + """ + self.convert_check(before, after, stages=[1]) + + @unittest.expectedFailure + def test_string_exceptions(self): + """ + 2to3 does not convert string exceptions: see + http://python3porting.com/differences.html. + """ + before = """ + try: + raise "old string exception" + except Exception, e: + pass + """ + after = """ + try: + raise Exception("old string exception") + except Exception as e: + pass + """ + self.convert_check(before, after, stages=[1]) + + def test_oldstyle_classes(self): + """ + We don't convert old-style classes to new-style automatically in + stage 1 (but we should in stage 2). So Blah should not inherit + explicitly from object yet. + """ + before = """ + class Blah: + pass + """ + self.unchanged(before, stages=[1]) + + def test_stdlib_modules_not_changed(self): + """ + Standard library module names should not be changed in stage 1 + """ + before = """ + import ConfigParser + import HTMLParser + import collections + + print 'Hello' + try: + raise AttributeError('blah') + except AttributeError, e: + pass + """ + after = """ + import ConfigParser + import HTMLParser + import collections + + print('Hello') + try: + raise AttributeError('blah') + except AttributeError as e: + pass + """ + self.convert_check(before, after, stages=[1], run=PY2) + + def test_octal_literals(self): + before = """ + mode = 0644 + """ + after = """ + mode = 0o644 + """ + self.convert_check(before, after) + + def test_long_int_literals(self): + before = """ + bignumber = 12345678901234567890L + """ + after = """ + bignumber = 12345678901234567890 + """ + self.convert_check(before, after) + + def test___future___import_position(self): + """ + Issue #4: __future__ imports inserted too low in file: SyntaxError + """ + code = """ + # Comments here + # and here + __version__=''' $Id$ ''' + __doc__="A Sequencer class counts things. It aids numbering and formatting lists." + __all__='Sequencer getSequencer setSequencer'.split() + # + # another comment + # + + CONSTANTS = [ 0, 01, 011, 0111, 012, 02, 021, 0211, 02111, 013 ] + _RN_LETTERS = "IVXLCDM" + + def my_func(value): + pass + + ''' Docstring-like comment here ''' + """ + self.convert(code) + + def test_issue_45(self): + """ + Tests whether running futurize -f libfuturize.fixes.fix_future_standard_library_urllib + on the code below causes a ValueError (issue #45). + """ + code = r""" + from __future__ import print_function + from urllib import urlopen, urlencode + oeis_url = 'http://oeis.org/' + def _fetch(url): + try: + f = urlopen(url) + result = f.read() + f.close() + return result + except IOError as msg: + raise IOError("%s\nError fetching %s." % (msg, url)) + """ + self.convert(code) + + def test_order_future_lines(self): + """ + Tests the internal order_future_lines() function. + """ + before = ''' + # comment here + from __future__ import print_function + from __future__ import absolute_import + # blank line or comment here + from future.utils import with_metaclass + from builtins import zzz + from builtins import aaa + from builtins import blah + # another comment + + import something_else + code_here + more_code_here + ''' + after = ''' + # comment here + from __future__ import absolute_import + from __future__ import print_function + # blank line or comment here + from future.utils import with_metaclass + from builtins import aaa + from builtins import blah + from builtins import zzz + # another comment + + import something_else + code_here + more_code_here + ''' + self.assertEqual(order_future_lines(reformat_code(before)), + reformat_code(after)) + + @unittest.expectedFailure + def test_issue_12(self): + """ + Issue #12: This code shouldn't be upset by additional imports. + __future__ imports must appear at the top of modules since about Python + 2.5. + """ + code = """ + from __future__ import with_statement + f = open('setup.py') + for i in xrange(100): + pass + """ + self.unchanged(code) + + @expectedFailurePY26 + def test_range_necessary_list_calls(self): + """ + On Py2.6 (only), the xrange_with_import fixer somehow seems to cause + l = range(10) + to be converted to: + l = list(list(range(10))) + with an extra list(...) call. + """ + before = """ + l = range(10) + assert isinstance(l, list) + for i in range(3): + print i + for i in xrange(3): + print i + """ + after = """ + from __future__ import print_function + from builtins import range + l = list(range(10)) + assert isinstance(l, list) + for i in range(3): + print(i) + for i in range(3): + print(i) + """ + self.convert_check(before, after) + + def test_basestring(self): + """ + The 2to3 basestring fixer breaks working Py2 code that uses basestring. + This tests whether something sensible is done instead. + """ + before = """ + assert isinstance('hello', basestring) + assert isinstance(u'hello', basestring) + assert isinstance(b'hello', basestring) + """ + after = """ + from past.builtins import basestring + assert isinstance('hello', basestring) + assert isinstance(u'hello', basestring) + assert isinstance(b'hello', basestring) + """ + self.convert_check(before, after) + + def test_safe_division(self): + """ + Tests whether Py2 scripts using old-style division still work + after futurization. + """ + before = """ + import random + class fraction(object): + numer = 0 + denom = 0 + def __init__(self, numer, denom): + self.numer = numer + self.denom = denom + + def total_count(self): + return self.numer * 50 + + x = 3 / 2 + y = 3. / 2 + foo = list(range(100)) + assert x == 1 and isinstance(x, int) + assert y == 1.5 and isinstance(y, float) + a = 1 + foo[len(foo) / 2] + b = 1 + foo[len(foo) * 3 / 4] + assert a == 51 + assert b == 76 + r = random.randint(0, 1000) * 1.0 / 1000 + output = { "SUCCESS": 5, "TOTAL": 10 } + output["SUCCESS"] * 100 / output["TOTAL"] + obj = fraction(1, 50) + val = float(obj.numer) / obj.denom * 1e-9 + obj.numer * obj.denom / val + obj.total_count() * val / 100 + obj.numer / obj.denom * 1e-9 + obj.numer / (obj.denom * 1e-9) + obj.numer / obj.denom / 1e-9 + obj.numer / (obj.denom / 1e-9) + original_numer = 1 + original_denom = 50 + 100 * abs(obj.numer - original_numer) / float(max(obj.denom, original_denom)) + 100 * abs(obj.numer - original_numer) / max(obj.denom, original_denom) + float(original_numer) * float(original_denom) / float(obj.numer) + """ + after = """ + from __future__ import division + from past.utils import old_div + import random + class fraction(object): + numer = 0 + denom = 0 + def __init__(self, numer, denom): + self.numer = numer + self.denom = denom + + def total_count(self): + return self.numer * 50 + + x = old_div(3, 2) + y = 3. / 2 + foo = list(range(100)) + assert x == 1 and isinstance(x, int) + assert y == 1.5 and isinstance(y, float) + a = 1 + foo[old_div(len(foo), 2)] + b = 1 + foo[old_div(len(foo) * 3, 4)] + assert a == 51 + assert b == 76 + r = random.randint(0, 1000) * 1.0 / 1000 + output = { "SUCCESS": 5, "TOTAL": 10 } + old_div(output["SUCCESS"] * 100, output["TOTAL"]) + obj = fraction(1, 50) + val = float(obj.numer) / obj.denom * 1e-9 + old_div(obj.numer * obj.denom, val) + old_div(obj.total_count() * val, 100) + old_div(obj.numer, obj.denom) * 1e-9 + old_div(obj.numer, (obj.denom * 1e-9)) + old_div(old_div(obj.numer, obj.denom), 1e-9) + old_div(obj.numer, (old_div(obj.denom, 1e-9))) + original_numer = 1 + original_denom = 50 + 100 * abs(obj.numer - original_numer) / float(max(obj.denom, original_denom)) + old_div(100 * abs(obj.numer - original_numer), max(obj.denom, original_denom)) + float(original_numer) * float(original_denom) / float(obj.numer) + """ + self.convert_check(before, after) + + def test_safe_division_overloaded(self): + """ + If division is overloaded, futurize may produce spurious old_div + calls. This test is for whether the code still works on Py2 + despite these calls. + """ + before = """ + class Path(str): + def __div__(self, other): + return self.__truediv__(other) + def __truediv__(self, other): + return Path(str(self) + '/' + str(other)) + path1 = Path('home') + path2 = Path('user') + z = path1 / path2 + assert isinstance(z, Path) + assert str(z) == 'home/user' + """ + after = """ + from __future__ import division + from past.utils import old_div + class Path(str): + def __div__(self, other): + return self.__truediv__(other) + def __truediv__(self, other): + return Path(str(self) + '/' + str(other)) + path1 = Path('home') + path2 = Path('user') + z = old_div(path1, path2) + assert isinstance(z, Path) + assert str(z) == 'home/user' + """ + self.convert_check(before, after) + + def test_basestring_issue_156(self): + before = """ + x = str(3) + allowed_types = basestring, int + assert isinstance('', allowed_types) + assert isinstance(u'', allowed_types) + assert isinstance(u'foo', basestring) + """ + after = """ + from builtins import str + from past.builtins import basestring + x = str(3) + allowed_types = basestring, int + assert isinstance('', allowed_types) + assert isinstance(u'', allowed_types) + assert isinstance(u'foo', basestring) + """ + self.convert_check(before, after) + + +class TestConservativeFuturize(CodeHandler): + @unittest.expectedFailure + def test_basestring(self): + """ + In conservative mode, futurize would not modify "basestring" + but merely import it from ``past``, and the following code would still + run on both Py2 and Py3. + """ + before = """ + assert isinstance('hello', basestring) + assert isinstance(u'hello', basestring) + assert isinstance(b'hello', basestring) + """ + after = """ + from past.builtins import basestring + assert isinstance('hello', basestring) + assert isinstance(u'hello', basestring) + assert isinstance(b'hello', basestring) + """ + self.convert_check(before, after, conservative=True) + + @unittest.expectedFailure + def test_open(self): + """ + In conservative mode, futurize would not import io.open because + this changes the default return type from bytes to text. + """ + before = """ + filename = 'temp_file_open.test' + contents = 'Temporary file contents. Delete me.' + with open(filename, 'w') as f: + f.write(contents) + + with open(filename, 'r') as f: + data = f.read() + assert isinstance(data, str) + assert data == contents + """ + after = """ + from past.builtins import open, str as oldbytes, unicode + filename = oldbytes(b'temp_file_open.test') + contents = oldbytes(b'Temporary file contents. Delete me.') + with open(filename, oldbytes(b'w')) as f: + f.write(contents) + + with open(filename, oldbytes(b'r')) as f: + data = f.read() + assert isinstance(data, oldbytes) + assert data == contents + assert isinstance(oldbytes(b'hello'), basestring) + assert isinstance(unicode(u'hello'), basestring) + assert isinstance(oldbytes(b'hello'), basestring) + """ + self.convert_check(before, after, conservative=True) + + +class TestFuturizeAllImports(CodeHandler): + """ + Tests "futurize --all-imports". + """ + @expectedFailurePY26 + def test_all_imports(self): + before = """ + import math + import os + l = range(10) + assert isinstance(l, list) + print 'Hello' + for i in xrange(100): + pass + print('Hello') + """ + after = """ + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function + from __future__ import unicode_literals + from future import standard_library + standard_library.install_aliases() + from builtins import * + from builtins import range + import math + import os + l = list(range(10)) + assert isinstance(l, list) + print('Hello') + for i in range(100): + pass + print('Hello') + """ + self.convert_check(before, after, all_imports=True, ignore_imports=False) + + +if __name__ == '__main__': + unittest.main() diff --git a/future/tests/test_html.py b/tests/test_future/test_html.py similarity index 85% rename from future/tests/test_html.py rename to tests/test_future/test_html.py index c2656131..251a530f 100644 --- a/future/tests/test_html.py +++ b/tests/test_future/test_html.py @@ -7,8 +7,10 @@ from __future__ import unicode_literals from future import standard_library -import html -import unittest +with standard_library.hooks(): + import html + +from future.tests.base import unittest class HtmlTests(unittest.TestCase): @@ -22,4 +24,4 @@ def test_escape(self): if __name__ == '__main__': - unittest_main() + unittest.main() diff --git a/future/tests/test_htmlparser.py b/tests/test_future/test_htmlparser.py similarity index 98% rename from future/tests/test_htmlparser.py rename to tests/test_future/test_htmlparser.py index d4576580..7a745acf 100644 --- a/future/tests/test_htmlparser.py +++ b/tests/test_future/test_htmlparser.py @@ -7,24 +7,25 @@ """ from __future__ import (absolute_import, print_function, unicode_literals) -from future import standard_library +from future import standard_library, utils from future.builtins import * -from test import support +from future.backports.test import support +import future.backports.html.parser as html_parser + import pprint -import unittest +from future.tests.base import unittest import sys -import html.parser -print(html.parser.__doc__, file=sys.stderr) +# print(html_parser.__doc__, file=sys.stderr) -class EventCollector(html.parser.HTMLParser): +class EventCollector(html_parser.HTMLParser): def __init__(self, *args, **kw): self.events = [] self.append = self.events.append - html.parser.HTMLParser.__init__(self, *args, **kw) + html_parser.HTMLParser.__init__(self, *args, **kw) def get_events(self): # Normalize the list of events so that buffer artefacts don't @@ -109,7 +110,7 @@ def parse(source=source): parser = self.get_collector() parser.feed(source) parser.close() - self.assertRaises(html.parser.HTMLParseError, parse) + self.assertRaises(html_parser.HTMLParseError, parse) class HTMLParserStrictTestCase(TestCaseBase): @@ -375,6 +376,7 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase): def get_collector(self): return EventCollector(strict=False) + @unittest.skipIf(utils.PY3, 'not working on Py3.3.4 for some reason ...') def test_tolerant_parsing(self): self._run_check('te>>xt&a<\n' '", []) self._run_check("", [('comment', '$')]) diff --git a/tests/test_future/test_http_cookiejar.py b/tests/test_future/test_http_cookiejar.py new file mode 100644 index 00000000..8a98ed68 --- /dev/null +++ b/tests/test_future/test_http_cookiejar.py @@ -0,0 +1,1755 @@ +"""Tests for http/cookiejar.py.""" +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from future.builtins import range +from future.builtins import open + +import os +import re +import time +from future.tests.base import unittest, skip26, expectedFailurePY26 +import future.backports.test.support as test_support +import future.backports.urllib.request as urllib_request + +from future.backports.http.cookiejar import (time2isoz, http2time, + iso2time, time2netscape, + parse_ns_headers, join_header_words, split_header_words, Cookie, + CookieJar, DefaultCookiePolicy, LWPCookieJar, MozillaCookieJar, + LoadError, lwp_cookie_str, DEFAULT_HTTP_PORT, escape_path, + reach, is_HDN, domain_match, user_domain_match, request_path, + request_port, request_host) + + +class DateTimeTests(unittest.TestCase): + + def test_time2isoz(self): + base = 1019227000 + day = 24*3600 + self.assertEqual(time2isoz(base), "2002-04-19 14:36:40Z") + self.assertEqual(time2isoz(base+day), "2002-04-20 14:36:40Z") + self.assertEqual(time2isoz(base+2*day), "2002-04-21 14:36:40Z") + self.assertEqual(time2isoz(base+3*day), "2002-04-22 14:36:40Z") + + az = time2isoz() + bz = time2isoz(500000) + for text in (az, bz): + self.assertRegex(text, r"^\d{4}-\d\d-\d\d \d\d:\d\d:\d\dZ$", + "bad time2isoz format: %s %s" % (az, bz)) + + def test_http2time(self): + def parse_date(text): + return time.gmtime(http2time(text))[:6] + + self.assertEqual(parse_date("01 Jan 2001"), (2001, 1, 1, 0, 0, 0.0)) + + # this test will break around year 2070 + self.assertEqual(parse_date("03-Feb-20"), (2020, 2, 3, 0, 0, 0.0)) + + # this test will break around year 2048 + self.assertEqual(parse_date("03-Feb-98"), (1998, 2, 3, 0, 0, 0.0)) + + def test_http2time_formats(self): + # test http2time for supported dates. Test cases with 2 digit year + # will probably break in year 2044. + tests = [ + 'Thu, 03 Feb 1994 00:00:00 GMT', # proposed new HTTP format + 'Thursday, 03-Feb-94 00:00:00 GMT', # old rfc850 HTTP format + 'Thursday, 03-Feb-1994 00:00:00 GMT', # broken rfc850 HTTP format + + '03 Feb 1994 00:00:00 GMT', # HTTP format (no weekday) + '03-Feb-94 00:00:00 GMT', # old rfc850 (no weekday) + '03-Feb-1994 00:00:00 GMT', # broken rfc850 (no weekday) + '03-Feb-1994 00:00 GMT', # broken rfc850 (no weekday, no seconds) + '03-Feb-1994 00:00', # broken rfc850 (no weekday, no seconds, no tz) + '02-Feb-1994 24:00', # broken rfc850 (no weekday, no seconds, + # no tz) using hour 24 with yesterday date + + '03-Feb-94', # old rfc850 HTTP format (no weekday, no time) + '03-Feb-1994', # broken rfc850 HTTP format (no weekday, no time) + '03 Feb 1994', # proposed new HTTP format (no weekday, no time) + + # A few tests with extra space at various places + ' 03 Feb 1994 0:00 ', + ' 03-Feb-1994 ', + ] + + test_t = 760233600 # assume broken POSIX counting of seconds + result = time2isoz(test_t) + expected = "1994-02-03 00:00:00Z" + self.assertEqual(result, expected, + "%s => '%s' (%s)" % (test_t, result, expected)) + + for s in tests: + self.assertEqual(http2time(s), test_t, s) + self.assertEqual(http2time(s.lower()), test_t, s.lower()) + self.assertEqual(http2time(s.upper()), test_t, s.upper()) + + def test_http2time_garbage(self): + for test in [ + '', + 'Garbage', + 'Mandag 16. September 1996', + '01-00-1980', + '01-13-1980', + '00-01-1980', + '32-01-1980', + '01-01-1980 25:00:00', + '01-01-1980 00:61:00', + '01-01-1980 00:00:62', + ]: + self.assertIsNone(http2time(test), + "http2time(%s) is not None\n" + "http2time(test) %s" % (test, http2time(test))) + + def test_iso2time(self): + def parse_date(text): + return time.gmtime(iso2time(text))[:6] + + # ISO 8601 compact format + self.assertEqual(parse_date("19940203T141529Z"), + (1994, 2, 3, 14, 15, 29)) + + # ISO 8601 with time behind UTC + self.assertEqual(parse_date("1994-02-03 07:15:29 -0700"), + (1994, 2, 3, 14, 15, 29)) + + # ISO 8601 with time ahead of UTC + self.assertEqual(parse_date("1994-02-03 19:45:29 +0530"), + (1994, 2, 3, 14, 15, 29)) + + def test_iso2time_formats(self): + # test iso2time for supported dates. + tests = [ + '1994-02-03 00:00:00 -0000', # ISO 8601 format + '1994-02-03 00:00:00 +0000', # ISO 8601 format + '1994-02-03 00:00:00', # zone is optional + '1994-02-03', # only date + '1994-02-03T00:00:00', # Use T as separator + '19940203', # only date + '1994-02-02 24:00:00', # using hour-24 yesterday date + '19940203T000000Z', # ISO 8601 compact format + + # A few tests with extra space at various places + ' 1994-02-03 ', + ' 1994-02-03T00:00:00 ', + ] + + test_t = 760233600 # assume broken POSIX counting of seconds + for s in tests: + self.assertEqual(iso2time(s), test_t, s) + self.assertEqual(iso2time(s.lower()), test_t, s.lower()) + self.assertEqual(iso2time(s.upper()), test_t, s.upper()) + + def test_iso2time_garbage(self): + for test in [ + '', + 'Garbage', + 'Thursday, 03-Feb-94 00:00:00 GMT', + '1980-00-01', + '1980-13-01', + '1980-01-00', + '1980-01-32', + '1980-01-01 25:00:00', + '1980-01-01 00:61:00', + '01-01-1980 00:00:62', + '01-01-1980T00:00:62', + '19800101T250000Z' + '1980-01-01 00:00:00 -2500', + ]: + self.assertIsNone(iso2time(test), + "iso2time(%s) is not None\n" + "iso2time(test) %s" % (test, iso2time(test))) + + +class HeaderTests(unittest.TestCase): + + def test_parse_ns_headers(self): + # quotes should be stripped + expected = [[('foo', 'bar'), ('expires', 2209069412), ('version', '0')]] + for hdr in [ + 'foo=bar; expires=01 Jan 2040 22:23:32 GMT', + 'foo=bar; expires="01 Jan 2040 22:23:32 GMT"', + ]: + self.assertEqual(parse_ns_headers([hdr]), expected) + + def test_parse_ns_headers_version(self): + + # quotes should be stripped + expected = [[('foo', 'bar'), ('version', '1')]] + for hdr in [ + 'foo=bar; version="1"', + 'foo=bar; Version="1"', + ]: + self.assertEqual(parse_ns_headers([hdr]), expected) + + def test_parse_ns_headers_special_names(self): + # names such as 'expires' are not special in first name=value pair + # of Set-Cookie: header + # Cookie with name 'expires' + hdr = 'expires=01 Jan 2040 22:23:32 GMT' + expected = [[("expires", "01 Jan 2040 22:23:32 GMT"), ("version", "0")]] + self.assertEqual(parse_ns_headers([hdr]), expected) + + def test_join_header_words(self): + joined = join_header_words([[("foo", None), ("bar", "baz")]]) + self.assertEqual(joined, "foo; bar=baz") + + self.assertEqual(join_header_words([[]]), "") + + def test_split_header_words(self): + tests = [ + ("foo", [[("foo", None)]]), + ("foo=bar", [[("foo", "bar")]]), + (" foo ", [[("foo", None)]]), + (" foo= ", [[("foo", "")]]), + (" foo=", [[("foo", "")]]), + (" foo= ; ", [[("foo", "")]]), + (" foo= ; bar= baz ", [[("foo", ""), ("bar", "baz")]]), + ("foo=bar bar=baz", [[("foo", "bar"), ("bar", "baz")]]), + # doesn't really matter if this next fails, but it works ATM + ("foo= bar=baz", [[("foo", "bar=baz")]]), + ("foo=bar;bar=baz", [[("foo", "bar"), ("bar", "baz")]]), + ('foo bar baz', [[("foo", None), ("bar", None), ("baz", None)]]), + ("a, b, c", [[("a", None)], [("b", None)], [("c", None)]]), + (r'foo; bar=baz, spam=, foo="\,\;\"", bar= ', + [[("foo", None), ("bar", "baz")], + [("spam", "")], [("foo", ',;"')], [("bar", "")]]), + ] + + for arg, expect in tests: + try: + result = split_header_words([arg]) + except: + import traceback, io + f = io.StringIO() + traceback.print_exc(None, f) + result = "(error -- traceback follows)\n\n%s" % f.getvalue() + self.assertEqual(result, expect, """ +When parsing: '%s' +Expected: '%s' +Got: '%s' +""" % (arg, expect, result)) + + def test_roundtrip(self): + tests = [ + ("foo", "foo"), + ("foo=bar", "foo=bar"), + (" foo ", "foo"), + ("foo=", 'foo=""'), + ("foo=bar bar=baz", "foo=bar; bar=baz"), + ("foo=bar;bar=baz", "foo=bar; bar=baz"), + ('foo bar baz', "foo; bar; baz"), + (r'foo="\"" bar="\\"', r'foo="\""; bar="\\"'), + ('foo,,,bar', 'foo, bar'), + ('foo=bar,bar=baz', 'foo=bar, bar=baz'), + + ('text/html; charset=iso-8859-1', + 'text/html; charset="iso-8859-1"'), + + ('foo="bar"; port="80,81"; discard, bar=baz', + 'foo=bar; port="80,81"; discard, bar=baz'), + + (r'Basic realm="\"foo\\\\bar\""', + r'Basic; realm="\"foo\\\\bar\""') + ] + + for arg, expect in tests: + input = split_header_words([arg]) + res = join_header_words(input) + self.assertEqual(res, expect, """ +When parsing: '%s' +Expected: '%s' +Got: '%s' +Input was: '%s' +""" % (arg, expect, res, input)) + + +class FakeResponse(object): + def __init__(self, headers=[], url=None): + """ + headers: list of RFC822-style 'Key: value' strings + """ + import email + # The email.message_from_string is available on both Py2.7 and Py3.3 + self._headers = email.message_from_string("\n".join(headers)) + self._url = url + def info(self): return self._headers + +def interact_2965(cookiejar, url, *set_cookie_hdrs): + return _interact(cookiejar, url, set_cookie_hdrs, "Set-Cookie2") + +def interact_netscape(cookiejar, url, *set_cookie_hdrs): + return _interact(cookiejar, url, set_cookie_hdrs, "Set-Cookie") + +def _interact(cookiejar, url, set_cookie_hdrs, hdr_name): + """Perform a single request / response cycle, returning Cookie: header.""" + req = urllib_request.Request(url) + cookiejar.add_cookie_header(req) + cookie_hdr = req.get_header("Cookie", "") + headers = [] + for hdr in set_cookie_hdrs: + headers.append("%s: %s" % (hdr_name, hdr)) + res = FakeResponse(headers, url) + cookiejar.extract_cookies(res, req) + return cookie_hdr + + +class FileCookieJarTests(unittest.TestCase): + @skip26 + def test_lwp_valueless_cookie(self): + # cookies with no value should be saved and loaded consistently + filename = test_support.TESTFN + c = LWPCookieJar() + interact_netscape(c, "http://www.acme.com/", 'boo') + self.assertEqual(c._cookies["www.acme.com"]["/"]["boo"].value, None) + try: + c.save(filename, ignore_discard=True) + c = LWPCookieJar() + c.load(filename, ignore_discard=True) + finally: + try: os.unlink(filename) + except OSError: pass + self.assertEqual(c._cookies["www.acme.com"]["/"]["boo"].value, None) + + def test_bad_magic(self): + # OSErrors (eg. file doesn't exist) are allowed to propagate + filename = test_support.TESTFN + for cookiejar_class in LWPCookieJar, MozillaCookieJar: + c = cookiejar_class() + try: + c.load(filename="for this test to work, a file with this " + "filename should not exist") + # Py2.7 raises IOError, which is an alias of OSError only on Py3: + except (OSError, IOError) as exc: + # an OSError subclass (likely FileNotFoundError), but not + # LoadError + self.assertIsNot(exc.__class__, LoadError) + else: + self.fail("expected OSError for invalid filename") + # Invalid contents of cookies file (eg. bad magic string) + # causes a LoadError. + try: + with open(filename, "w") as f: + f.write("oops\n") + for cookiejar_class in LWPCookieJar, MozillaCookieJar: + c = cookiejar_class() + self.assertRaises(LoadError, c.load, filename) + finally: + try: os.unlink(filename) + except OSError: pass + +class CookieTests(unittest.TestCase): + # XXX + # Get rid of string comparisons where not actually testing str / repr. + # .clear() etc. + # IP addresses like 50 (single number, no dot) and domain-matching + # functions (and is_HDN)? See draft RFC 2965 errata. + # Strictness switches + # is_third_party() + # unverifiability / third-party blocking + # Netscape cookies work the same as RFC 2965 with regard to port. + # Set-Cookie with negative max age. + # If turn RFC 2965 handling off, Set-Cookie2 cookies should not clobber + # Set-Cookie cookies. + # Cookie2 should be sent if *any* cookies are not V1 (ie. V0 OR V2 etc.). + # Cookies (V1 and V0) with no expiry date should be set to be discarded. + # RFC 2965 Quoting: + # Should accept unquoted cookie-attribute values? check errata draft. + # Which are required on the way in and out? + # Should always return quoted cookie-attribute values? + # Proper testing of when RFC 2965 clobbers Netscape (waiting for errata). + # Path-match on return (same for V0 and V1). + # RFC 2965 acceptance and returning rules + # Set-Cookie2 without version attribute is rejected. + + # Netscape peculiarities list from Ronald Tschalar. + # The first two still need tests, the rest are covered. +## - Quoting: only quotes around the expires value are recognized as such +## (and yes, some folks quote the expires value); quotes around any other +## value are treated as part of the value. +## - White space: white space around names and values is ignored +## - Default path: if no path parameter is given, the path defaults to the +## path in the request-uri up to, but not including, the last '/'. Note +## that this is entirely different from what the spec says. +## - Commas and other delimiters: Netscape just parses until the next ';'. +## This means it will allow commas etc inside values (and yes, both +## commas and equals are commonly appear in the cookie value). This also +## means that if you fold multiple Set-Cookie header fields into one, +## comma-separated list, it'll be a headache to parse (at least my head +## starts hurting every time I think of that code). +## - Expires: You'll get all sorts of date formats in the expires, +## including empty expires attributes ("expires="). Be as flexible as you +## can, and certainly don't expect the weekday to be there; if you can't +## parse it, just ignore it and pretend it's a session cookie. +## - Domain-matching: Netscape uses the 2-dot rule for _all_ domains, not +## just the 7 special TLD's listed in their spec. And folks rely on +## that... + + def test_domain_return_ok(self): + # test optimization: .domain_return_ok() should filter out most + # domains in the CookieJar before we try to access them (because that + # may require disk access -- in particular, with MSIECookieJar) + # This is only a rough check for performance reasons, so it's not too + # critical as long as it's sufficiently liberal. + pol = DefaultCookiePolicy() + for url, domain, ok in [ + ("http://foo.bar.com/", "blah.com", False), + ("http://foo.bar.com/", "rhubarb.blah.com", False), + ("http://foo.bar.com/", "rhubarb.foo.bar.com", False), + ("http://foo.bar.com/", ".foo.bar.com", True), + ("http://foo.bar.com/", "foo.bar.com", True), + ("http://foo.bar.com/", ".bar.com", True), + ("http://foo.bar.com/", "com", True), + ("http://foo.com/", "rhubarb.foo.com", False), + ("http://foo.com/", ".foo.com", True), + ("http://foo.com/", "foo.com", True), + ("http://foo.com/", "com", True), + ("http://foo/", "rhubarb.foo", False), + ("http://foo/", ".foo", True), + ("http://foo/", "foo", True), + ("http://foo/", "foo.local", True), + ("http://foo/", ".local", True), + ]: + request = urllib_request.Request(url) + r = pol.domain_return_ok(domain, request) + if ok: self.assertTrue(r) + else: self.assertFalse(r) + + @skip26 + def test_missing_value(self): + # missing = sign in Cookie: header is regarded by Mozilla as a missing + # name, and by http.cookiejar as a missing value + filename = test_support.TESTFN + c = MozillaCookieJar(filename) + interact_netscape(c, "http://www.acme.com/", 'eggs') + interact_netscape(c, "http://www.acme.com/", '"spam"; path=/foo/') + cookie = c._cookies["www.acme.com"]["/"]["eggs"] + self.assertIsNone(cookie.value) + self.assertEqual(cookie.name, "eggs") + cookie = c._cookies["www.acme.com"]['/foo/']['"spam"'] + self.assertIsNone(cookie.value) + self.assertEqual(cookie.name, '"spam"') + self.assertEqual(lwp_cookie_str(cookie), ( + r'"spam"; path="/foo/"; domain="www.acme.com"; ' + 'path_spec; discard; version=0')) + old_str = repr(c) + c.save(ignore_expires=True, ignore_discard=True) + try: + c = MozillaCookieJar(filename) + c.revert(ignore_expires=True, ignore_discard=True) + finally: + os.unlink(c.filename) + # cookies unchanged apart from lost info re. whether path was specified + self.assertEqual( + repr(c), + re.sub("path_specified=%s" % True, "path_specified=%s" % False, + old_str) + ) + self.assertEqual(interact_netscape(c, "http://www.acme.com/foo/"), + '"spam"; eggs') + + @expectedFailurePY26 + def test_rfc2109_handling(self): + # RFC 2109 cookies are handled as RFC 2965 or Netscape cookies, + # dependent on policy settings + for rfc2109_as_netscape, rfc2965, version in [ + # default according to rfc2965 if not explicitly specified + (None, False, 0), + (None, True, 1), + # explicit rfc2109_as_netscape + (False, False, None), # version None here means no cookie stored + (False, True, 1), + (True, False, 0), + (True, True, 0), + ]: + policy = DefaultCookiePolicy( + rfc2109_as_netscape=rfc2109_as_netscape, + rfc2965=rfc2965) + c = CookieJar(policy) + interact_netscape(c, "http://www.example.com/", "ni=ni; Version=1") + try: + cookie = c._cookies["www.example.com"]["/"]["ni"] + except KeyError: + self.assertIsNone(version) # didn't expect a stored cookie + else: + self.assertEqual(cookie.version, version) + # 2965 cookies are unaffected + interact_2965(c, "http://www.example.com/", + "foo=bar; Version=1") + if rfc2965: + cookie2965 = c._cookies["www.example.com"]["/"]["foo"] + self.assertEqual(cookie2965.version, 1) + + @skip26 + def test_ns_parser(self): + c = CookieJar() + interact_netscape(c, "http://www.acme.com/", + 'spam=eggs; DoMain=.acme.com; port; blArgh="feep"') + interact_netscape(c, "http://www.acme.com/", 'ni=ni; port=80,8080') + interact_netscape(c, "http://www.acme.com:80/", 'nini=ni') + interact_netscape(c, "http://www.acme.com:80/", 'foo=bar; expires=') + interact_netscape(c, "http://www.acme.com:80/", 'spam=eggs; ' + 'expires="Foo Bar 25 33:22:11 3022"') + + cookie = c._cookies[".acme.com"]["/"]["spam"] + self.assertEqual(cookie.domain, ".acme.com") + self.assertTrue(cookie.domain_specified) + self.assertEqual(cookie.port, DEFAULT_HTTP_PORT) + self.assertFalse(cookie.port_specified) + # case is preserved + self.assertTrue(cookie.has_nonstandard_attr("blArgh")) + self.assertFalse(cookie.has_nonstandard_attr("blargh")) + + cookie = c._cookies["www.acme.com"]["/"]["ni"] + self.assertEqual(cookie.domain, "www.acme.com") + self.assertFalse(cookie.domain_specified) + self.assertEqual(cookie.port, "80,8080") + self.assertTrue(cookie.port_specified) + + cookie = c._cookies["www.acme.com"]["/"]["nini"] + self.assertIsNone(cookie.port) + self.assertFalse(cookie.port_specified) + + # invalid expires should not cause cookie to be dropped + foo = c._cookies["www.acme.com"]["/"]["foo"] + spam = c._cookies["www.acme.com"]["/"]["foo"] + self.assertIsNone(foo.expires) + self.assertIsNone(spam.expires) + + @skip26 + def test_ns_parser_special_names(self): + # names such as 'expires' are not special in first name=value pair + # of Set-Cookie: header + c = CookieJar() + interact_netscape(c, "http://www.acme.com/", 'expires=eggs') + interact_netscape(c, "http://www.acme.com/", 'version=eggs; spam=eggs') + + cookies = c._cookies["www.acme.com"]["/"] + self.assertIn('expires', cookies) + self.assertIn('version', cookies) + + @expectedFailurePY26 + def test_expires(self): + # if expires is in future, keep cookie... + c = CookieJar() + future = time2netscape(time.time()+3600) + interact_netscape(c, "http://www.acme.com/", 'spam="bar"; expires=%s' % + future) + self.assertEqual(len(c), 1) + now = time2netscape(time.time()-1) + # ... and if in past or present, discard it + interact_netscape(c, "http://www.acme.com/", 'foo="eggs"; expires=%s' % + now) + h = interact_netscape(c, "http://www.acme.com/") + self.assertEqual(len(c), 1) + self.assertIn('spam="bar"', h) + self.assertNotIn("foo", h) + + # max-age takes precedence over expires, and zero max-age is request to + # delete both new cookie and any old matching cookie + interact_netscape(c, "http://www.acme.com/", 'eggs="bar"; expires=%s' % + future) + interact_netscape(c, "http://www.acme.com/", 'bar="bar"; expires=%s' % + future) + self.assertEqual(len(c), 3) + interact_netscape(c, "http://www.acme.com/", 'eggs="bar"; ' + 'expires=%s; max-age=0' % future) + interact_netscape(c, "http://www.acme.com/", 'bar="bar"; ' + 'max-age=0; expires=%s' % future) + h = interact_netscape(c, "http://www.acme.com/") + self.assertEqual(len(c), 1) + + # test expiry at end of session for cookies with no expires attribute + interact_netscape(c, "http://www.rhubarb.net/", 'whum="fizz"') + self.assertEqual(len(c), 2) + c.clear_session_cookies() + self.assertEqual(len(c), 1) + self.assertIn('spam="bar"', h) + + # XXX RFC 2965 expiry rules (some apply to V0 too) + + @skip26 + def test_default_path(self): + # RFC 2965 + pol = DefaultCookiePolicy(rfc2965=True) + + c = CookieJar(pol) + interact_2965(c, "http://www.acme.com/", 'spam="bar"; Version="1"') + self.assertIn("/", c._cookies["www.acme.com"]) + + c = CookieJar(pol) + interact_2965(c, "http://www.acme.com/blah", 'eggs="bar"; Version="1"') + self.assertIn("/", c._cookies["www.acme.com"]) + + c = CookieJar(pol) + interact_2965(c, "http://www.acme.com/blah/rhubarb", + 'eggs="bar"; Version="1"') + self.assertIn("/blah/", c._cookies["www.acme.com"]) + + c = CookieJar(pol) + interact_2965(c, "http://www.acme.com/blah/rhubarb/", + 'eggs="bar"; Version="1"') + self.assertIn("/blah/rhubarb/", c._cookies["www.acme.com"]) + + # Netscape + + c = CookieJar() + interact_netscape(c, "http://www.acme.com/", 'spam="bar"') + self.assertIn("/", c._cookies["www.acme.com"]) + + c = CookieJar() + interact_netscape(c, "http://www.acme.com/blah", 'eggs="bar"') + self.assertIn("/", c._cookies["www.acme.com"]) + + c = CookieJar() + interact_netscape(c, "http://www.acme.com/blah/rhubarb", 'eggs="bar"') + self.assertIn("/blah", c._cookies["www.acme.com"]) + + c = CookieJar() + interact_netscape(c, "http://www.acme.com/blah/rhubarb/", 'eggs="bar"') + self.assertIn("/blah/rhubarb", c._cookies["www.acme.com"]) + + @skip26 + def test_default_path_with_query(self): + cj = CookieJar() + uri = "http://example.com/?spam/eggs" + value = 'eggs="bar"' + interact_netscape(cj, uri, value) + # Default path does not include query, so is "/", not "/?spam". + self.assertIn("/", cj._cookies["example.com"]) + # Cookie is sent back to the same URI. + self.assertEqual(interact_netscape(cj, uri), value) + + def test_escape_path(self): + cases = [ + # quoted safe + ("/foo%2f/bar", "/foo%2F/bar"), + ("/foo%2F/bar", "/foo%2F/bar"), + # quoted % + ("/foo%%/bar", "/foo%%/bar"), + # quoted unsafe + ("/fo%19o/bar", "/fo%19o/bar"), + ("/fo%7do/bar", "/fo%7Do/bar"), + # unquoted safe + ("/foo/bar&", "/foo/bar&"), + ("/foo//bar", "/foo//bar"), + ("\176/foo/bar", "\176/foo/bar"), + # unquoted unsafe + ("/foo\031/bar", "/foo%19/bar"), + ("/\175foo/bar", "/%7Dfoo/bar"), + # unicode, latin-1 range + ("/foo/bar\u00fc", "/foo/bar%C3%BC"), # UTF-8 encoded + # unicode + ("/foo/bar\uabcd", "/foo/bar%EA%AF%8D"), # UTF-8 encoded + ] + for arg, result in cases: + self.assertEqual(escape_path(arg), result) + + def test_request_path(self): + # with parameters + req = urllib_request.Request( + "http://www.example.com/rheum/rhaponticum;" + "foo=bar;sing=song?apples=pears&spam=eggs#ni") + self.assertEqual(request_path(req), + "/rheum/rhaponticum;foo=bar;sing=song") + # without parameters + req = urllib_request.Request( + "http://www.example.com/rheum/rhaponticum?" + "apples=pears&spam=eggs#ni") + self.assertEqual(request_path(req), "/rheum/rhaponticum") + # missing final slash + req = urllib_request.Request("http://www.example.com") + self.assertEqual(request_path(req), "/") + + def test_request_port(self): + req = urllib_request.Request("http://www.acme.com:1234/", + headers={"Host": "www.acme.com:4321"}) + self.assertEqual(request_port(req), "1234") + req = urllib_request.Request("http://www.acme.com/", + headers={"Host": "www.acme.com:4321"}) + self.assertEqual(request_port(req), DEFAULT_HTTP_PORT) + + def test_request_host(self): + # this request is illegal (RFC2616, 14.2.3) + req = urllib_request.Request("http://1.1.1.1/", + headers={"Host": "www.acme.com:80"}) + # libwww-perl wants this response, but that seems wrong (RFC 2616, + # section 5.2, point 1., and RFC 2965 section 1, paragraph 3) + #self.assertEqual(request_host(req), "www.acme.com") + self.assertEqual(request_host(req), "1.1.1.1") + req = urllib_request.Request("http://www.acme.com/", + headers={"Host": "irrelevant.com"}) + self.assertEqual(request_host(req), "www.acme.com") + # port shouldn't be in request-host + req = urllib_request.Request("http://www.acme.com:2345/resource.html", + headers={"Host": "www.acme.com:5432"}) + self.assertEqual(request_host(req), "www.acme.com") + + def test_is_HDN(self): + self.assertTrue(is_HDN("foo.bar.com")) + self.assertTrue(is_HDN("1foo2.3bar4.5com")) + self.assertFalse(is_HDN("192.168.1.1")) + self.assertFalse(is_HDN("")) + self.assertFalse(is_HDN(".")) + self.assertFalse(is_HDN(".foo.bar.com")) + self.assertFalse(is_HDN("..foo")) + self.assertFalse(is_HDN("foo.")) + + def test_reach(self): + self.assertEqual(reach("www.acme.com"), ".acme.com") + self.assertEqual(reach("acme.com"), "acme.com") + self.assertEqual(reach("acme.local"), ".local") + self.assertEqual(reach(".local"), ".local") + self.assertEqual(reach(".com"), ".com") + self.assertEqual(reach("."), ".") + self.assertEqual(reach(""), "") + self.assertEqual(reach("192.168.0.1"), "192.168.0.1") + + def test_domain_match(self): + self.assertTrue(domain_match("192.168.1.1", "192.168.1.1")) + self.assertFalse(domain_match("192.168.1.1", ".168.1.1")) + self.assertTrue(domain_match("x.y.com", "x.Y.com")) + self.assertTrue(domain_match("x.y.com", ".Y.com")) + self.assertFalse(domain_match("x.y.com", "Y.com")) + self.assertTrue(domain_match("a.b.c.com", ".c.com")) + self.assertFalse(domain_match(".c.com", "a.b.c.com")) + self.assertTrue(domain_match("example.local", ".local")) + self.assertFalse(domain_match("blah.blah", "")) + self.assertFalse(domain_match("", ".rhubarb.rhubarb")) + self.assertTrue(domain_match("", "")) + + self.assertTrue(user_domain_match("acme.com", "acme.com")) + self.assertFalse(user_domain_match("acme.com", ".acme.com")) + self.assertTrue(user_domain_match("rhubarb.acme.com", ".acme.com")) + self.assertTrue(user_domain_match("www.rhubarb.acme.com", ".acme.com")) + self.assertTrue(user_domain_match("x.y.com", "x.Y.com")) + self.assertTrue(user_domain_match("x.y.com", ".Y.com")) + self.assertFalse(user_domain_match("x.y.com", "Y.com")) + self.assertTrue(user_domain_match("y.com", "Y.com")) + self.assertFalse(user_domain_match(".y.com", "Y.com")) + self.assertTrue(user_domain_match(".y.com", ".Y.com")) + self.assertTrue(user_domain_match("x.y.com", ".com")) + self.assertFalse(user_domain_match("x.y.com", "com")) + self.assertFalse(user_domain_match("x.y.com", "m")) + self.assertFalse(user_domain_match("x.y.com", ".m")) + self.assertFalse(user_domain_match("x.y.com", "")) + self.assertFalse(user_domain_match("x.y.com", ".")) + self.assertTrue(user_domain_match("192.168.1.1", "192.168.1.1")) + # not both HDNs, so must string-compare equal to match + self.assertFalse(user_domain_match("192.168.1.1", ".168.1.1")) + self.assertFalse(user_domain_match("192.168.1.1", ".")) + # empty string is a special case + self.assertFalse(user_domain_match("192.168.1.1", "")) + + def test_wrong_domain(self): + # Cookies whose effective request-host name does not domain-match the + # domain are rejected. + + # XXX far from complete + c = CookieJar() + interact_2965(c, "http://www.nasty.com/", + 'foo=bar; domain=friendly.org; Version="1"') + self.assertEqual(len(c), 0) + + @expectedFailurePY26 + def test_strict_domain(self): + # Cookies whose domain is a country-code tld like .co.uk should + # not be set if CookiePolicy.strict_domain is true. + cp = DefaultCookiePolicy(strict_domain=True) + cj = CookieJar(policy=cp) + interact_netscape(cj, "http://example.co.uk/", 'no=problemo') + interact_netscape(cj, "http://example.co.uk/", + 'okey=dokey; Domain=.example.co.uk') + self.assertEqual(len(cj), 2) + for pseudo_tld in [".co.uk", ".org.za", ".tx.us", ".name.us"]: + interact_netscape(cj, "http://example.%s/" % pseudo_tld, + 'spam=eggs; Domain=.co.uk') + self.assertEqual(len(cj), 2) + + @expectedFailurePY26 + def test_two_component_domain_ns(self): + # Netscape: .www.bar.com, www.bar.com, .bar.com, bar.com, no domain + # should all get accepted, as should .acme.com, acme.com and no domain + # for 2-component domains like acme.com. + c = CookieJar() + + # two-component V0 domain is OK + interact_netscape(c, "http://foo.net/", 'ns=bar') + self.assertEqual(len(c), 1) + self.assertEqual(c._cookies["foo.net"]["/"]["ns"].value, "bar") + self.assertEqual(interact_netscape(c, "http://foo.net/"), "ns=bar") + # *will* be returned to any other domain (unlike RFC 2965)... + self.assertEqual(interact_netscape(c, "http://www.foo.net/"), + "ns=bar") + # ...unless requested otherwise + pol = DefaultCookiePolicy( + strict_ns_domain=DefaultCookiePolicy.DomainStrictNonDomain) + c.set_policy(pol) + self.assertEqual(interact_netscape(c, "http://www.foo.net/"), "") + + # unlike RFC 2965, even explicit two-component domain is OK, + # because .foo.net matches foo.net + interact_netscape(c, "http://foo.net/foo/", + 'spam1=eggs; domain=foo.net') + # even if starts with a dot -- in NS rules, .foo.net matches foo.net! + interact_netscape(c, "http://foo.net/foo/bar/", + 'spam2=eggs; domain=.foo.net') + self.assertEqual(len(c), 3) + self.assertEqual(c._cookies[".foo.net"]["/foo"]["spam1"].value, + "eggs") + self.assertEqual(c._cookies[".foo.net"]["/foo/bar"]["spam2"].value, + "eggs") + self.assertEqual(interact_netscape(c, "http://foo.net/foo/bar/"), + "spam2=eggs; spam1=eggs; ns=bar") + + # top-level domain is too general + interact_netscape(c, "http://foo.net/", 'nini="ni"; domain=.net') + self.assertEqual(len(c), 3) + +## # Netscape protocol doesn't allow non-special top level domains (such +## # as co.uk) in the domain attribute unless there are at least three +## # dots in it. + # Oh yes it does! Real implementations don't check this, and real + # cookies (of course) rely on that behaviour. + interact_netscape(c, "http://foo.co.uk", 'nasty=trick; domain=.co.uk') +## self.assertEqual(len(c), 2) + self.assertEqual(len(c), 4) + + @expectedFailurePY26 + def test_two_component_domain_rfc2965(self): + pol = DefaultCookiePolicy(rfc2965=True) + c = CookieJar(pol) + + # two-component V1 domain is OK + interact_2965(c, "http://foo.net/", 'foo=bar; Version="1"') + self.assertEqual(len(c), 1) + self.assertEqual(c._cookies["foo.net"]["/"]["foo"].value, "bar") + self.assertEqual(interact_2965(c, "http://foo.net/"), + "$Version=1; foo=bar") + # won't be returned to any other domain (because domain was implied) + self.assertEqual(interact_2965(c, "http://www.foo.net/"), "") + + # unless domain is given explicitly, because then it must be + # rewritten to start with a dot: foo.net --> .foo.net, which does + # not domain-match foo.net + interact_2965(c, "http://foo.net/foo", + 'spam=eggs; domain=foo.net; path=/foo; Version="1"') + self.assertEqual(len(c), 1) + self.assertEqual(interact_2965(c, "http://foo.net/foo"), + "$Version=1; foo=bar") + + # explicit foo.net from three-component domain www.foo.net *does* get + # set, because .foo.net domain-matches .foo.net + interact_2965(c, "http://www.foo.net/foo/", + 'spam=eggs; domain=foo.net; Version="1"') + self.assertEqual(c._cookies[".foo.net"]["/foo/"]["spam"].value, + "eggs") + self.assertEqual(len(c), 2) + self.assertEqual(interact_2965(c, "http://foo.net/foo/"), + "$Version=1; foo=bar") + self.assertEqual(interact_2965(c, "http://www.foo.net/foo/"), + '$Version=1; spam=eggs; $Domain="foo.net"') + + # top-level domain is too general + interact_2965(c, "http://foo.net/", + 'ni="ni"; domain=".net"; Version="1"') + self.assertEqual(len(c), 2) + + # RFC 2965 doesn't require blocking this + interact_2965(c, "http://foo.co.uk/", + 'nasty=trick; domain=.co.uk; Version="1"') + self.assertEqual(len(c), 3) + + @expectedFailurePY26 + def test_domain_allow(self): + c = CookieJar(policy=DefaultCookiePolicy( + blocked_domains=["acme.com"], + allowed_domains=["www.acme.com"])) + + req = urllib_request.Request("http://acme.com/") + headers = ["Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/"] + res = FakeResponse(headers, "http://acme.com/") + c.extract_cookies(res, req) + self.assertEqual(len(c), 0) + + req = urllib_request.Request("http://www.acme.com/") + res = FakeResponse(headers, "http://www.acme.com/") + c.extract_cookies(res, req) + self.assertEqual(len(c), 1) + + req = urllib_request.Request("http://www.coyote.com/") + res = FakeResponse(headers, "http://www.coyote.com/") + c.extract_cookies(res, req) + self.assertEqual(len(c), 1) + + # set a cookie with non-allowed domain... + req = urllib_request.Request("http://www.coyote.com/") + res = FakeResponse(headers, "http://www.coyote.com/") + cookies = c.make_cookies(res, req) + c.set_cookie(cookies[0]) + self.assertEqual(len(c), 2) + # ... and check is doesn't get returned + c.add_cookie_header(req) + self.assertFalse(req.has_header("Cookie")) + + @expectedFailurePY26 + def test_domain_block(self): + pol = DefaultCookiePolicy( + rfc2965=True, blocked_domains=[".acme.com"]) + c = CookieJar(policy=pol) + headers = ["Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/"] + + req = urllib_request.Request("http://www.acme.com/") + res = FakeResponse(headers, "http://www.acme.com/") + c.extract_cookies(res, req) + self.assertEqual(len(c), 0) + + p = pol.set_blocked_domains(["acme.com"]) + c.extract_cookies(res, req) + self.assertEqual(len(c), 1) + + c.clear() + req = urllib_request.Request("http://www.roadrunner.net/") + res = FakeResponse(headers, "http://www.roadrunner.net/") + c.extract_cookies(res, req) + self.assertEqual(len(c), 1) + req = urllib_request.Request("http://www.roadrunner.net/") + c.add_cookie_header(req) + self.assertTrue(req.has_header("Cookie")) + self.assertTrue(req.has_header("Cookie2")) + + c.clear() + pol.set_blocked_domains([".acme.com"]) + c.extract_cookies(res, req) + self.assertEqual(len(c), 1) + + # set a cookie with blocked domain... + req = urllib_request.Request("http://www.acme.com/") + res = FakeResponse(headers, "http://www.acme.com/") + cookies = c.make_cookies(res, req) + c.set_cookie(cookies[0]) + self.assertEqual(len(c), 2) + # ... and check is doesn't get returned + c.add_cookie_header(req) + self.assertFalse(req.has_header("Cookie")) + + @skip26 + def test_secure(self): + for ns in True, False: + for whitespace in " ", "": + c = CookieJar() + if ns: + pol = DefaultCookiePolicy(rfc2965=False) + int = interact_netscape + vs = "" + else: + pol = DefaultCookiePolicy(rfc2965=True) + int = interact_2965 + vs = "; Version=1" + c.set_policy(pol) + url = "http://www.acme.com/" + int(c, url, "foo1=bar%s%s" % (vs, whitespace)) + int(c, url, "foo2=bar%s; secure%s" % (vs, whitespace)) + self.assertFalse( + c._cookies["www.acme.com"]["/"]["foo1"].secure, + "non-secure cookie registered secure") + self.assertTrue( + c._cookies["www.acme.com"]["/"]["foo2"].secure, + "secure cookie registered non-secure") + + @expectedFailurePY26 + def test_quote_cookie_value(self): + c = CookieJar(policy=DefaultCookiePolicy(rfc2965=True)) + interact_2965(c, "http://www.acme.com/", r'foo=\b"a"r; Version=1') + h = interact_2965(c, "http://www.acme.com/") + self.assertEqual(h, r'$Version=1; foo=\\b\"a\"r') + + @expectedFailurePY26 + def test_missing_final_slash(self): + # Missing slash from request URL's abs_path should be assumed present. + url = "http://www.acme.com" + c = CookieJar(DefaultCookiePolicy(rfc2965=True)) + interact_2965(c, url, "foo=bar; Version=1") + req = urllib_request.Request(url) + self.assertEqual(len(c), 1) + c.add_cookie_header(req) + self.assertTrue(req.has_header("Cookie")) + + @expectedFailurePY26 + def test_domain_mirror(self): + pol = DefaultCookiePolicy(rfc2965=True) + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, "spam=eggs; Version=1") + h = interact_2965(c, url) + self.assertNotIn("Domain", h, + "absent domain returned with domain present") + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, 'spam=eggs; Version=1; Domain=.bar.com') + h = interact_2965(c, url) + self.assertIn('$Domain=".bar.com"', h, "domain not returned") + + c = CookieJar(pol) + url = "http://foo.bar.com/" + # note missing initial dot in Domain + interact_2965(c, url, 'spam=eggs; Version=1; Domain=bar.com') + h = interact_2965(c, url) + self.assertIn('$Domain="bar.com"', h, "domain not returned") + + @expectedFailurePY26 + def test_path_mirror(self): + pol = DefaultCookiePolicy(rfc2965=True) + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, "spam=eggs; Version=1") + h = interact_2965(c, url) + self.assertNotIn("Path", h, "absent path returned with path present") + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, 'spam=eggs; Version=1; Path=/') + h = interact_2965(c, url) + self.assertIn('$Path="/"', h, "path not returned") + + @expectedFailurePY26 + def test_port_mirror(self): + pol = DefaultCookiePolicy(rfc2965=True) + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, "spam=eggs; Version=1") + h = interact_2965(c, url) + self.assertNotIn("Port", h, "absent port returned with port present") + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, "spam=eggs; Version=1; Port") + h = interact_2965(c, url) + self.assertRegex(h, "\$Port([^=]|$)", + "port with no value not returned with no value") + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, 'spam=eggs; Version=1; Port="80"') + h = interact_2965(c, url) + self.assertIn('$Port="80"', h, + "port with single value not returned with single value") + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, 'spam=eggs; Version=1; Port="80,8080"') + h = interact_2965(c, url) + self.assertIn('$Port="80,8080"', h, + "port with multiple values not returned with multiple " + "values") + + def test_no_return_comment(self): + c = CookieJar(DefaultCookiePolicy(rfc2965=True)) + url = "http://foo.bar.com/" + interact_2965(c, url, 'spam=eggs; Version=1; ' + 'Comment="does anybody read these?"; ' + 'CommentURL="http://foo.bar.net/comment.html"') + h = interact_2965(c, url) + self.assertNotIn("Comment", h, + "Comment or CommentURL cookie-attributes returned to server") + + def test_Cookie_iterator(self): + cs = CookieJar(DefaultCookiePolicy(rfc2965=True)) + # add some random cookies + interact_2965(cs, "http://blah.spam.org/", 'foo=eggs; Version=1; ' + 'Comment="does anybody read these?"; ' + 'CommentURL="http://foo.bar.net/comment.html"') + interact_netscape(cs, "http://www.acme.com/blah/", "spam=bar; secure") + interact_2965(cs, "http://www.acme.com/blah/", + "foo=bar; secure; Version=1") + interact_2965(cs, "http://www.acme.com/blah/", + "foo=bar; path=/; Version=1") + interact_2965(cs, "http://www.sol.no", + r'bang=wallop; version=1; domain=".sol.no"; ' + r'port="90,100, 80,8080"; ' + r'max-age=100; Comment = "Just kidding! (\"|\\\\) "') + + versions = [1, 1, 1, 0, 1] + names = ["bang", "foo", "foo", "spam", "foo"] + domains = [".sol.no", "blah.spam.org", "www.acme.com", + "www.acme.com", "www.acme.com"] + paths = ["/", "/", "/", "/blah", "/blah/"] + + for i in range(4): + i = 0 + for c in cs: + self.assertIsInstance(c, Cookie) + self.assertEqual(c.version, versions[i]) + self.assertEqual(c.name, names[i]) + self.assertEqual(c.domain, domains[i]) + self.assertEqual(c.path, paths[i]) + i = i + 1 + + def test_parse_ns_headers(self): + # missing domain value (invalid cookie) + self.assertEqual( + parse_ns_headers(["foo=bar; path=/; domain"]), + [[("foo", "bar"), + ("path", "/"), ("domain", None), ("version", "0")]] + ) + # invalid expires value + self.assertEqual( + parse_ns_headers(["foo=bar; expires=Foo Bar 12 33:22:11 2000"]), + [[("foo", "bar"), ("expires", None), ("version", "0")]] + ) + # missing cookie value (valid cookie) + self.assertEqual( + parse_ns_headers(["foo"]), + [[("foo", None), ("version", "0")]] + ) + # shouldn't add version if header is empty + self.assertEqual(parse_ns_headers([""]), []) + + @skip26 + def test_bad_cookie_header(self): + + def cookiejar_from_cookie_headers(headers): + c = CookieJar() + req = urllib_request.Request("http://www.example.com/") + r = FakeResponse(headers, "http://www.example.com/") + c.extract_cookies(r, req) + return c + + # none of these bad headers should cause an exception to be raised + for headers in [ + ["Set-Cookie: "], # actually, nothing wrong with this + ["Set-Cookie2: "], # ditto + # missing domain value + ["Set-Cookie2: a=foo; path=/; Version=1; domain"], + # bad max-age + ["Set-Cookie: b=foo; max-age=oops"], + # bad version + ["Set-Cookie: b=foo; version=spam"], + ]: + c = cookiejar_from_cookie_headers(headers) + # these bad cookies shouldn't be set + self.assertEqual(len(c), 0) + + # cookie with invalid expires is treated as session cookie + headers = ["Set-Cookie: c=foo; expires=Foo Bar 12 33:22:11 2000"] + c = cookiejar_from_cookie_headers(headers) + cookie = c._cookies["www.example.com"]["/"]["c"] + self.assertIsNone(cookie.expires) + + +class LWPCookieTests(unittest.TestCase): + # Tests taken from libwww-perl, with a few modifications and additions. + + @expectedFailurePY26 + def test_netscape_example_1(self): + #------------------------------------------------------------------- + # First we check that it works for the original example at + # http://www.netscape.com/newsref/std/cookie_spec.html + + # Client requests a document, and receives in the response: + # + # Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/; expires=Wednesday, 09-Nov-99 23:12:40 GMT + # + # When client requests a URL in path "/" on this server, it sends: + # + # Cookie: CUSTOMER=WILE_E_COYOTE + # + # Client requests a document, and receives in the response: + # + # Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/ + # + # When client requests a URL in path "/" on this server, it sends: + # + # Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001 + # + # Client receives: + # + # Set-Cookie: SHIPPING=FEDEX; path=/fo + # + # When client requests a URL in path "/" on this server, it sends: + # + # Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001 + # + # When client requests a URL in path "/foo" on this server, it sends: + # + # Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001; SHIPPING=FEDEX + # + # The last Cookie is buggy, because both specifications say that the + # most specific cookie must be sent first. SHIPPING=FEDEX is the + # most specific and should thus be first. + + year_plus_one = time.localtime()[0] + 1 + + headers = [] + + c = CookieJar(DefaultCookiePolicy(rfc2965 = True)) + + #req = urllib_request.Request("http://1.1.1.1/", + # headers={"Host": "www.acme.com:80"}) + req = urllib_request.Request("http://www.acme.com:80/", + headers={"Host": "www.acme.com:80"}) + + headers.append( + "Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/ ; " + "expires=Wednesday, 09-Nov-%d 23:12:40 GMT" % year_plus_one) + res = FakeResponse(headers, "http://www.acme.com/") + c.extract_cookies(res, req) + + req = urllib_request.Request("http://www.acme.com/") + c.add_cookie_header(req) + + self.assertEqual(req.get_header("Cookie"), "CUSTOMER=WILE_E_COYOTE") + self.assertEqual(req.get_header("Cookie2"), '$Version="1"') + + headers.append("Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/") + res = FakeResponse(headers, "http://www.acme.com/") + c.extract_cookies(res, req) + + req = urllib_request.Request("http://www.acme.com/foo/bar") + c.add_cookie_header(req) + + h = req.get_header("Cookie") + self.assertIn("PART_NUMBER=ROCKET_LAUNCHER_0001", h) + self.assertIn("CUSTOMER=WILE_E_COYOTE", h) + + headers.append('Set-Cookie: SHIPPING=FEDEX; path=/foo') + res = FakeResponse(headers, "http://www.acme.com") + c.extract_cookies(res, req) + + req = urllib_request.Request("http://www.acme.com/") + c.add_cookie_header(req) + + h = req.get_header("Cookie") + self.assertIn("PART_NUMBER=ROCKET_LAUNCHER_0001", h) + self.assertIn("CUSTOMER=WILE_E_COYOTE", h) + self.assertNotIn("SHIPPING=FEDEX", h) + + req = urllib_request.Request("http://www.acme.com/foo/") + c.add_cookie_header(req) + + h = req.get_header("Cookie") + self.assertIn("PART_NUMBER=ROCKET_LAUNCHER_0001", h) + self.assertIn("CUSTOMER=WILE_E_COYOTE", h) + self.assertTrue(h.startswith("SHIPPING=FEDEX;")) + + @expectedFailurePY26 + def test_netscape_example_2(self): + # Second Example transaction sequence: + # + # Assume all mappings from above have been cleared. + # + # Client receives: + # + # Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/ + # + # When client requests a URL in path "/" on this server, it sends: + # + # Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001 + # + # Client receives: + # + # Set-Cookie: PART_NUMBER=RIDING_ROCKET_0023; path=/ammo + # + # When client requests a URL in path "/ammo" on this server, it sends: + # + # Cookie: PART_NUMBER=RIDING_ROCKET_0023; PART_NUMBER=ROCKET_LAUNCHER_0001 + # + # NOTE: There are two name/value pairs named "PART_NUMBER" due to + # the inheritance of the "/" mapping in addition to the "/ammo" mapping. + + c = CookieJar() + headers = [] + + req = urllib_request.Request("http://www.acme.com/") + headers.append("Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/") + res = FakeResponse(headers, "http://www.acme.com/") + + c.extract_cookies(res, req) + + req = urllib_request.Request("http://www.acme.com/") + c.add_cookie_header(req) + + self.assertEqual(req.get_header("Cookie"), + "PART_NUMBER=ROCKET_LAUNCHER_0001") + + headers.append( + "Set-Cookie: PART_NUMBER=RIDING_ROCKET_0023; path=/ammo") + res = FakeResponse(headers, "http://www.acme.com/") + c.extract_cookies(res, req) + + req = urllib_request.Request("http://www.acme.com/ammo") + c.add_cookie_header(req) + + self.assertRegex(req.get_header("Cookie"), + r"PART_NUMBER=RIDING_ROCKET_0023;\s*" + "PART_NUMBER=ROCKET_LAUNCHER_0001") + + @expectedFailurePY26 + def test_ietf_example_1(self): + #------------------------------------------------------------------- + # Then we test with the examples from draft-ietf-http-state-man-mec-03.txt + # + # 5. EXAMPLES + + c = CookieJar(DefaultCookiePolicy(rfc2965=True)) + + # + # 5.1 Example 1 + # + # Most detail of request and response headers has been omitted. Assume + # the user agent has no stored cookies. + # + # 1. User Agent -> Server + # + # POST /acme/login HTTP/1.1 + # [form data] + # + # User identifies self via a form. + # + # 2. Server -> User Agent + # + # HTTP/1.1 200 OK + # Set-Cookie2: Customer="WILE_E_COYOTE"; Version="1"; Path="/acme" + # + # Cookie reflects user's identity. + + cookie = interact_2965( + c, 'http://www.acme.com/acme/login', + 'Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"') + self.assertFalse(cookie) + + # + # 3. User Agent -> Server + # + # POST /acme/pickitem HTTP/1.1 + # Cookie: $Version="1"; Customer="WILE_E_COYOTE"; $Path="/acme" + # [form data] + # + # User selects an item for ``shopping basket.'' + # + # 4. Server -> User Agent + # + # HTTP/1.1 200 OK + # Set-Cookie2: Part_Number="Rocket_Launcher_0001"; Version="1"; + # Path="/acme" + # + # Shopping basket contains an item. + + cookie = interact_2965(c, 'http://www.acme.com/acme/pickitem', + 'Part_Number="Rocket_Launcher_0001"; ' + 'Version="1"; Path="/acme"'); + self.assertRegex(cookie, + r'^\$Version="?1"?; Customer="?WILE_E_COYOTE"?; \$Path="/acme"$') + + # + # 5. User Agent -> Server + # + # POST /acme/shipping HTTP/1.1 + # Cookie: $Version="1"; + # Customer="WILE_E_COYOTE"; $Path="/acme"; + # Part_Number="Rocket_Launcher_0001"; $Path="/acme" + # [form data] + # + # User selects shipping method from form. + # + # 6. Server -> User Agent + # + # HTTP/1.1 200 OK + # Set-Cookie2: Shipping="FedEx"; Version="1"; Path="/acme" + # + # New cookie reflects shipping method. + + cookie = interact_2965(c, "http://www.acme.com/acme/shipping", + 'Shipping="FedEx"; Version="1"; Path="/acme"') + + self.assertRegex(cookie, r'^\$Version="?1"?;') + self.assertRegex(cookie, r'Part_Number="?Rocket_Launcher_0001"?;' + '\s*\$Path="\/acme"') + self.assertRegex(cookie, r'Customer="?WILE_E_COYOTE"?;' + '\s*\$Path="\/acme"') + + # + # 7. User Agent -> Server + # + # POST /acme/process HTTP/1.1 + # Cookie: $Version="1"; + # Customer="WILE_E_COYOTE"; $Path="/acme"; + # Part_Number="Rocket_Launcher_0001"; $Path="/acme"; + # Shipping="FedEx"; $Path="/acme" + # [form data] + # + # User chooses to process order. + # + # 8. Server -> User Agent + # + # HTTP/1.1 200 OK + # + # Transaction is complete. + + cookie = interact_2965(c, "http://www.acme.com/acme/process") + self.assertRegex(cookie, r'Shipping="?FedEx"?;\s*\$Path="\/acme"') + self.assertIn("WILE_E_COYOTE", cookie) + + # + # The user agent makes a series of requests on the origin server, after + # each of which it receives a new cookie. All the cookies have the same + # Path attribute and (default) domain. Because the request URLs all have + # /acme as a prefix, and that matches the Path attribute, each request + # contains all the cookies received so far. + + @expectedFailurePY26 + def test_ietf_example_2(self): + # 5.2 Example 2 + # + # This example illustrates the effect of the Path attribute. All detail + # of request and response headers has been omitted. Assume the user agent + # has no stored cookies. + + c = CookieJar(DefaultCookiePolicy(rfc2965=True)) + + # Imagine the user agent has received, in response to earlier requests, + # the response headers + # + # Set-Cookie2: Part_Number="Rocket_Launcher_0001"; Version="1"; + # Path="/acme" + # + # and + # + # Set-Cookie2: Part_Number="Riding_Rocket_0023"; Version="1"; + # Path="/acme/ammo" + + interact_2965( + c, "http://www.acme.com/acme/ammo/specific", + 'Part_Number="Rocket_Launcher_0001"; Version="1"; Path="/acme"', + 'Part_Number="Riding_Rocket_0023"; Version="1"; Path="/acme/ammo"') + + # A subsequent request by the user agent to the (same) server for URLs of + # the form /acme/ammo/... would include the following request header: + # + # Cookie: $Version="1"; + # Part_Number="Riding_Rocket_0023"; $Path="/acme/ammo"; + # Part_Number="Rocket_Launcher_0001"; $Path="/acme" + # + # Note that the NAME=VALUE pair for the cookie with the more specific Path + # attribute, /acme/ammo, comes before the one with the less specific Path + # attribute, /acme. Further note that the same cookie name appears more + # than once. + + cookie = interact_2965(c, "http://www.acme.com/acme/ammo/...") + self.assertRegex(cookie, r"Riding_Rocket_0023.*Rocket_Launcher_0001") + + # A subsequent request by the user agent to the (same) server for a URL of + # the form /acme/parts/ would include the following request header: + # + # Cookie: $Version="1"; Part_Number="Rocket_Launcher_0001"; $Path="/acme" + # + # Here, the second cookie's Path attribute /acme/ammo is not a prefix of + # the request URL, /acme/parts/, so the cookie does not get forwarded to + # the server. + + cookie = interact_2965(c, "http://www.acme.com/acme/parts/") + self.assertIn("Rocket_Launcher_0001", cookie) + self.assertNotIn("Riding_Rocket_0023", cookie) + + @expectedFailurePY26 + def test_rejection(self): + # Test rejection of Set-Cookie2 responses based on domain, path, port. + pol = DefaultCookiePolicy(rfc2965=True) + + c = LWPCookieJar(policy=pol) + + max_age = "max-age=3600" + + # illegal domain (no embedded dots) + cookie = interact_2965(c, "http://www.acme.com", + 'foo=bar; domain=".com"; version=1') + self.assertFalse(c) + + # legal domain + cookie = interact_2965(c, "http://www.acme.com", + 'ping=pong; domain="acme.com"; version=1') + self.assertEqual(len(c), 1) + + # illegal domain (host prefix "www.a" contains a dot) + cookie = interact_2965(c, "http://www.a.acme.com", + 'whiz=bang; domain="acme.com"; version=1') + self.assertEqual(len(c), 1) + + # legal domain + cookie = interact_2965(c, "http://www.a.acme.com", + 'wow=flutter; domain=".a.acme.com"; version=1') + self.assertEqual(len(c), 2) + + # can't partially match an IP-address + cookie = interact_2965(c, "http://125.125.125.125", + 'zzzz=ping; domain="125.125.125"; version=1') + self.assertEqual(len(c), 2) + + # illegal path (must be prefix of request path) + cookie = interact_2965(c, "http://www.sol.no", + 'blah=rhubarb; domain=".sol.no"; path="/foo"; ' + 'version=1') + self.assertEqual(len(c), 2) + + # legal path + cookie = interact_2965(c, "http://www.sol.no/foo/bar", + 'bing=bong; domain=".sol.no"; path="/foo"; ' + 'version=1') + self.assertEqual(len(c), 3) + + # illegal port (request-port not in list) + cookie = interact_2965(c, "http://www.sol.no", + 'whiz=ffft; domain=".sol.no"; port="90,100"; ' + 'version=1') + self.assertEqual(len(c), 3) + + # legal port + cookie = interact_2965( + c, "http://www.sol.no", + r'bang=wallop; version=1; domain=".sol.no"; ' + r'port="90,100, 80,8080"; ' + r'max-age=100; Comment = "Just kidding! (\"|\\\\) "') + self.assertEqual(len(c), 4) + + # port attribute without any value (current port) + cookie = interact_2965(c, "http://www.sol.no", + 'foo9=bar; version=1; domain=".sol.no"; port; ' + 'max-age=100;') + self.assertEqual(len(c), 5) + + # encoded path + # LWP has this test, but unescaping allowed path characters seems + # like a bad idea, so I think this should fail: +## cookie = interact_2965(c, "http://www.sol.no/foo/", +## r'foo8=bar; version=1; path="/%66oo"') + # but this is OK, because '<' is not an allowed HTTP URL path + # character: + cookie = interact_2965(c, "http://www.sol.no/>1. + # Worked by accident in Windows release build, but failed in debug build. + # Failed in all Linux builds. + x = -1-sys.maxsize + self.assertEqual(x >> 1, x//2) + + self.assertRaises(ValueError, int, '123\0') + self.assertRaises(ValueError, int, '53', 40) + + # SF bug 1545497: embedded NULs were not detected with + # explicit base + self.assertRaises(ValueError, int, '123\0', 10) + self.assertRaises(ValueError, int, '123\x00 245', 20) + + x = int('1' * 600) + self.assertIsInstance(x, int) + + + self.assertRaises(TypeError, int, 1, 12) + + self.assertEqual(int('0o123', 0), 83) + self.assertEqual(int('0x123', 16), 291) + + # Bug 1679: "0x" is not a valid hex literal + self.assertRaises(ValueError, int, "0x", 16) + self.assertRaises(ValueError, int, "0x", 0) + + self.assertRaises(ValueError, int, "0o", 8) + self.assertRaises(ValueError, int, "0o", 0) + + self.assertRaises(ValueError, int, "0b", 2) + self.assertRaises(ValueError, int, "0b", 0) + + # SF bug 1334662: int(string, base) wrong answers + # Various representations of 2**32 evaluated to 0 + # rather than 2**32 in previous versions + + self.assertEqual(int('100000000000000000000000000000000', 2), 4294967296) + self.assertEqual(int('102002022201221111211', 3), 4294967296) + self.assertEqual(int('10000000000000000', 4), 4294967296) + self.assertEqual(int('32244002423141', 5), 4294967296) + self.assertEqual(int('1550104015504', 6), 4294967296) + self.assertEqual(int('211301422354', 7), 4294967296) + self.assertEqual(int('40000000000', 8), 4294967296) + self.assertEqual(int('12068657454', 9), 4294967296) + self.assertEqual(int('4294967296', 10), 4294967296) + self.assertEqual(int('1904440554', 11), 4294967296) + self.assertEqual(int('9ba461594', 12), 4294967296) + self.assertEqual(int('535a79889', 13), 4294967296) + self.assertEqual(int('2ca5b7464', 14), 4294967296) + self.assertEqual(int('1a20dcd81', 15), 4294967296) + self.assertEqual(int('100000000', 16), 4294967296) + self.assertEqual(int('a7ffda91', 17), 4294967296) + self.assertEqual(int('704he7g4', 18), 4294967296) + self.assertEqual(int('4f5aff66', 19), 4294967296) + self.assertEqual(int('3723ai4g', 20), 4294967296) + self.assertEqual(int('281d55i4', 21), 4294967296) + self.assertEqual(int('1fj8b184', 22), 4294967296) + self.assertEqual(int('1606k7ic', 23), 4294967296) + self.assertEqual(int('mb994ag', 24), 4294967296) + self.assertEqual(int('hek2mgl', 25), 4294967296) + self.assertEqual(int('dnchbnm', 26), 4294967296) + self.assertEqual(int('b28jpdm', 27), 4294967296) + self.assertEqual(int('8pfgih4', 28), 4294967296) + self.assertEqual(int('76beigg', 29), 4294967296) + self.assertEqual(int('5qmcpqg', 30), 4294967296) + self.assertEqual(int('4q0jto4', 31), 4294967296) + self.assertEqual(int('4000000', 32), 4294967296) + self.assertEqual(int('3aokq94', 33), 4294967296) + self.assertEqual(int('2qhxjli', 34), 4294967296) + self.assertEqual(int('2br45qb', 35), 4294967296) + self.assertEqual(int('1z141z4', 36), 4294967296) + + # tests with base 0 + # this fails on 3.0, but in 2.x the old octal syntax is allowed + self.assertEqual(int(' 0o123 ', 0), 83) + self.assertEqual(int(' 0o123 ', 0), 83) + self.assertEqual(int('000', 0), 0) + self.assertEqual(int('0o123', 0), 83) + self.assertEqual(int('0x123', 0), 291) + self.assertEqual(int('0b100', 0), 4) + self.assertEqual(int(' 0O123 ', 0), 83) + self.assertEqual(int(' 0X123 ', 0), 291) + self.assertEqual(int(' 0B100 ', 0), 4) + + # without base still base 10 + self.assertEqual(int('0123'), 123) + self.assertEqual(int('0123', 10), 123) + + # tests with prefix and base != 0 + self.assertEqual(int('0x123', 16), 291) + self.assertEqual(int('0o123', 8), 83) + self.assertEqual(int('0b100', 2), 4) + self.assertEqual(int('0X123', 16), 291) + self.assertEqual(int('0O123', 8), 83) + self.assertEqual(int('0B100', 2), 4) + + # the code has special checks for the first character after the + # type prefix + self.assertRaises(ValueError, int, '0b2', 2) + self.assertRaises(ValueError, int, '0b02', 2) + self.assertRaises(ValueError, int, '0B2', 2) + self.assertRaises(ValueError, int, '0B02', 2) + self.assertRaises(ValueError, int, '0o8', 8) + self.assertRaises(ValueError, int, '0o08', 8) + self.assertRaises(ValueError, int, '0O8', 8) + self.assertRaises(ValueError, int, '0O08', 8) + self.assertRaises(ValueError, int, '0xg', 16) + self.assertRaises(ValueError, int, '0x0g', 16) + self.assertRaises(ValueError, int, '0Xg', 16) + self.assertRaises(ValueError, int, '0X0g', 16) + + # SF bug 1334662: int(string, base) wrong answers + # Checks for proper evaluation of 2**32 + 1 + self.assertEqual(int('100000000000000000000000000000001', 2), 4294967297) + self.assertEqual(int('102002022201221111212', 3), 4294967297) + self.assertEqual(int('10000000000000001', 4), 4294967297) + self.assertEqual(int('32244002423142', 5), 4294967297) + self.assertEqual(int('1550104015505', 6), 4294967297) + self.assertEqual(int('211301422355', 7), 4294967297) + self.assertEqual(int('40000000001', 8), 4294967297) + self.assertEqual(int('12068657455', 9), 4294967297) + self.assertEqual(int('4294967297', 10), 4294967297) + self.assertEqual(int('1904440555', 11), 4294967297) + self.assertEqual(int('9ba461595', 12), 4294967297) + self.assertEqual(int('535a7988a', 13), 4294967297) + self.assertEqual(int('2ca5b7465', 14), 4294967297) + self.assertEqual(int('1a20dcd82', 15), 4294967297) + self.assertEqual(int('100000001', 16), 4294967297) + self.assertEqual(int('a7ffda92', 17), 4294967297) + self.assertEqual(int('704he7g5', 18), 4294967297) + self.assertEqual(int('4f5aff67', 19), 4294967297) + self.assertEqual(int('3723ai4h', 20), 4294967297) + self.assertEqual(int('281d55i5', 21), 4294967297) + self.assertEqual(int('1fj8b185', 22), 4294967297) + self.assertEqual(int('1606k7id', 23), 4294967297) + self.assertEqual(int('mb994ah', 24), 4294967297) + self.assertEqual(int('hek2mgm', 25), 4294967297) + self.assertEqual(int('dnchbnn', 26), 4294967297) + self.assertEqual(int('b28jpdn', 27), 4294967297) + self.assertEqual(int('8pfgih5', 28), 4294967297) + self.assertEqual(int('76beigh', 29), 4294967297) + self.assertEqual(int('5qmcpqh', 30), 4294967297) + self.assertEqual(int('4q0jto5', 31), 4294967297) + self.assertEqual(int('4000001', 32), 4294967297) + self.assertEqual(int('3aokq95', 33), 4294967297) + self.assertEqual(int('2qhxjlj', 34), 4294967297) + self.assertEqual(int('2br45qc', 35), 4294967297) + self.assertEqual(int('1z141z5', 36), 4294967297) + + @expectedFailurePY2 # fails on Py2 + @cpython_only + def test_small_ints(self): + # Bug #3236: Return small longs from PyLong_FromString + self.assertIs(int('10'), 10) + self.assertIs(int('-1'), -1) + self.assertIs(int(b'10'), 10) + self.assertIs(int(b'-1'), -1) + + def test_no_args(self): + self.assertEqual(int(), 0) + + @unittest.skipIf(sys.version_info >= (3, 7), + "The first parameter must be positional with Python >= 3.7" + ) + def test_x_keyword_arg(self): + # Test invoking int() using keyword arguments. + self.assertEqual(int(x=1.2), 1) + self.assertEqual(int(x='100', base=2), 4) + + def text_base_keyword_arg(self): + self.assertEqual(int('100', base=2), 4) + + def test_newint_plus_float(self): + minutes = int(100) + second = 0.0 + seconds = minutes*60 + second + self.assertEqual(seconds, 6000) + self.assertTrue(isinstance(seconds, float)) + + @expectedFailurePY2 + def test_keyword_args_2(self): + # newint causes these to fail: + self.assertRaises(TypeError, int, base=10) + self.assertRaises(TypeError, int, base=0) + + def test_non_numeric_input_types(self): + # Test possible non-numeric types for the argument x, including + # subclasses of the explicitly documented accepted types. + class CustomStr(str): pass + class CustomBytes(bytes): pass + class CustomByteArray(bytearray): pass + + values = [b'100', + bytearray(b'100'), + CustomStr('100'), + CustomBytes(b'100'), + CustomByteArray(b'100')] + + for x in values: + msg = 'x has type %s' % type(x).__name__ + self.assertEqual(int(x), 100, msg=msg) + self.assertEqual(int(x, 2), 4, msg=msg) + + def test_newint_of_newstr(self): + a = str(u'123') + b = int(a) + self.assertEqual(b, 123) + self.assertTrue(isinstance(b, int)) + + def test_string_float(self): + self.assertRaises(ValueError, int, '1.2') + + def test_intconversion(self): + # Test __int__() + class ClassicMissingMethods: + pass + # The following raises an AttributeError (for '__trunc__') on Py2 + # but a TypeError on Py3 (which uses new-style classes). + # Perhaps nothing is to be done but avoiding old-style classes! + # ... + # self.assertRaises(TypeError, int, ClassicMissingMethods()) + + class MissingMethods(object): + pass + self.assertRaises(TypeError, int, MissingMethods()) + + class Foo0: + def __int__(self): + return 42 + + class Foo1(object): + def __int__(self): + return 42 + + class Foo2(int): + def __int__(self): + return 42 + + class Foo3(int): + def __int__(self): + return self.real + + class Foo4(int): + def __int__(self): + return 42 + + class Foo5(int): + def __int__(self): + return 42. + + self.assertEqual(int(Foo0()), 42) + self.assertEqual(int(Foo1()), 42) + self.assertEqual(int(Foo2()), 42) + self.assertEqual(int(Foo3()), 0) + self.assertEqual(int(Foo4()), 42) + self.assertRaises(TypeError, int, Foo5()) + + class Classic: + pass + for base in (object, Classic): + class IntOverridesTrunc(base): + def __int__(self): + return 42 + def __trunc__(self): + return -12 + self.assertEqual(int(IntOverridesTrunc()), 42) + + class JustTrunc(base): + def __trunc__(self): + return 42 + # This fails on Python 2.x: + # if not PY26: + # self.assertEqual(int(JustTrunc()), 42) + + for trunc_result_base in (object, Classic): + class Integral(trunc_result_base): + def __int__(self): + return 42 + + class TruncReturnsNonInt(base): + def __trunc__(self): + return Integral() + # Fails on Python 2.6: + # self.assertEqual(int(TruncReturnsNonInt()), 42) + + class NonIntegral(trunc_result_base): + def __trunc__(self): + # Check that we avoid infinite recursion. + return NonIntegral() + + class TruncReturnsNonIntegral(base): + def __trunc__(self): + return NonIntegral() + try: + int(TruncReturnsNonIntegral()) + except TypeError as e: + # self.assertEqual(str(e), + # "__trunc__ returned non-Integral" + # " (type NonIntegral)") + pass + else: + self.fail("Failed to raise TypeError with %s" % + ((base, trunc_result_base),)) + + # Regression test for bugs.python.org/issue16060. + class BadInt(trunc_result_base): + def __int__(self): + return 42.0 + + class TruncReturnsBadInt(base): + def __trunc__(self): + return BadInt() + + with self.assertRaises(TypeError): + int(TruncReturnsBadInt()) + + #################################################################### + # future-specific tests are below: + #################################################################### + + # Exception messages in Py2 are 8-bit strings. The following fails, + # even if the testlist strings are wrapped in str() calls... + @expectedFailurePY2 + def test_error_message(self): + testlist = ('\xbd', '123\xbd', ' 123 456 ') + for s in testlist: + try: + int(s) + except ValueError as e: + self.assertIn(s.strip(), e.args[0]) + else: + self.fail("Expected int(%r) to raise a ValueError", s) + + def test_bytes_mul(self): + self.assertEqual(b'\x00' * int(5), b'\x00' * 5) + self.assertEqual(bytes(b'\x00') * int(5), bytes(b'\x00') * 5) + + def test_str_mul(self): + self.assertEqual(u'\x00' * int(5), u'\x00' * 5) + self.assertEqual(str(u'\x00') * int(5), str(u'\x00') * 5) + + def test_int_bytes(self): + self.assertEqual(int(b'a\r\n', 16), 10) + self.assertEqual(int(bytes(b'a\r\n'), 16), 10) + + def test_divmod(self): + """ + Test int.__divmod__ + """ + vals = [10**i for i in range(0, 20)] + for i in range(200): + x = random.choice(vals) + y = random.choice(vals) + assert divmod(int(x), int(y)) == divmod(x, y) + assert divmod(int(-x), int(y)) == divmod(-x, y) + assert divmod(int(x), int(-y)) == divmod(x, -y) + assert divmod(int(-x), int(-y)) == divmod(-x, -y) + + assert divmod(int(x), float(y)) == divmod(x, float(y)) + assert divmod(int(-x), float(y)) == divmod(-x, float(y)) + assert divmod(int(x), float(-y)) == divmod(x, float(-y)) + assert divmod(int(-x), float(-y)) == divmod(-x, float(-y)) + + def _frange(x, y, step): + _x = x ; i = 0 + while _x < y: + yield _x + i += 1 ; _x = x + i * step + + for i in range(20): + for d in _frange(0.005, 5.0, 0.005): + self.assertEqual(divmod(int(i), d), divmod(i, d), msg='i={0}; d={1}'.format(i, d)) + self.assertEqual(divmod(int(-i), d), divmod(-i, d), msg='i={0}; d={1}'.format(i, d)) + self.assertEqual(divmod(int(i), -d), divmod(i, -d), msg='i={0}; d={1}'.format(i, d)) + self.assertEqual(divmod(int(-i), -d), divmod(-i, -d), msg='i={0}; d={1}'.format(i, d)) + + def test_div(self): + """ + Issue #38 + """ + a = int(3) + self.assertEqual(a / 5., 0.6) + self.assertEqual(a / 5, 0.6) # the __future__.division import is in + # effect + + def test_truediv(self): + """ + Test int.__truediv__ and friends (rtruediv, itruediv) + """ + a = int(3) + self.assertEqual(a / 2, 1.5) # since "from __future__ import division" + # is in effect + self.assertEqual(type(a / 2), float) + + b = int(2) + self.assertEqual(a / b, 1.5) # since "from __future__ import division" + # is in effect + self.assertEqual(type(a / b), float) + + c = int(3) / b + self.assertEqual(c, 1.5) + self.assertTrue(isinstance(c, float)) + + d = int(5) + d /= 5 + self.assertEqual(d, 1.0) + self.assertTrue(isinstance(d, float)) + + e = int(10) + f = int(20) + e /= f + self.assertEqual(e, 0.5) + self.assertTrue(isinstance(e, float)) + + + def test_idiv(self): + a = int(3) + a /= 2 + self.assertEqual(a, 1.5) + self.assertTrue(isinstance(a, float)) + b = int(10) + b /= 2 + self.assertEqual(b, 5.0) + self.assertTrue(isinstance(b, float)) + c = int(-3) + c /= 2.0 + self.assertEqual(c, -1.5) + self.assertTrue(isinstance(c, float)) + + def test_floordiv(self): + a = int(3) + self.assertEqual(a // 2, 1) + self.assertEqual(type(a // 2), int) # i.e. another newint + self.assertTrue(isinstance(a // 2, int)) + + b = int(2) + self.assertEqual(a // b, 1) + self.assertEqual(type(a // b), int) # i.e. another newint + self.assertTrue(isinstance(a // b, int)) + + c = 3 // b + self.assertEqual(c, 1) + self.assertEqual(type(c), int) # i.e. another newint + self.assertTrue(isinstance(c, int)) + + d = int(5) + d //= 5 + self.assertEqual(d, 1) + self.assertEqual(type(d), int) # i.e. another newint + self.assertTrue(isinstance(d, int)) + + e = int(10) + f = int(20) + e //= f + self.assertEqual(e, 0) + self.assertEqual(type(e), int) # i.e. another newint + self.assertTrue(isinstance(e, int)) + + + def test_div(self): + """ + Issue #38 + """ + a = int(3) + self.assertEqual(a / 5., 0.6) + self.assertEqual(a / 5, 0.6) # the __future__.division import is in + # effect + + def test_truediv(self): + """ + Test int.__truediv__ and friends (rtruediv, itruediv) + """ + a = int(3) + self.assertEqual(a / 2, 1.5) # since "from __future__ import division" + # is in effect + self.assertEqual(type(a / 2), float) + + b = int(2) + self.assertEqual(a / b, 1.5) # since "from __future__ import division" + # is in effect + self.assertEqual(type(a / b), float) + + c = int(3) / b + self.assertEqual(c, 1.5) + self.assertTrue(isinstance(c, float)) + + d = int(5) + d /= 5 + self.assertEqual(d, 1.0) + self.assertTrue(isinstance(d, float)) + + e = int(10) + f = int(20) + e /= f + self.assertEqual(e, 0.5) + self.assertTrue(isinstance(e, float)) + + + def test_idiv(self): + a = int(3) + a /= 2 + self.assertEqual(a, 1.5) + self.assertTrue(isinstance(a, float)) + b = int(10) + b /= 2 + self.assertEqual(b, 5.0) + self.assertTrue(isinstance(b, float)) + c = int(-3) + c /= 2.0 + self.assertEqual(c, -1.5) + self.assertTrue(isinstance(c, float)) + + + def test_floordiv(self): + a = int(3) + self.assertEqual(a // 2, 1) + self.assertEqual(type(a // 2), int) # i.e. another newint + self.assertTrue(isinstance(a // 2, int)) + + b = int(2) + self.assertEqual(a // b, 1) + self.assertEqual(type(a // b), int) # i.e. another newint + self.assertTrue(isinstance(a // b, int)) + + c = 3 // b + self.assertEqual(c, 1) + self.assertEqual(type(c), int) # i.e. another newint + self.assertTrue(isinstance(c, int)) + + d = int(5) + d //= 5 + self.assertEqual(d, 1) + self.assertEqual(type(d), int) # i.e. another newint + self.assertTrue(isinstance(d, int)) + + e = int(10) + f = int(20) + e //= f + self.assertEqual(e, 0) + self.assertEqual(type(e), int) # i.e. another newint + self.assertTrue(isinstance(e, int)) + + @unittest.skipIf(np is None, "test requires NumPy") + @unittest.expectedFailure + def test_numpy_cast_as_long_and_newint(self): + """ + NumPy currently doesn't like subclasses of ``long``. This should be fixed. + """ + class longsubclass(long): + pass + + a = np.arange(10**3, dtype=np.float64).reshape(10, 100) + b = a.astype(longsubclass) + c = a.astype(int) + print(b.dtype) + assert b.dtype == np.int64 == c.dtype + + def test_upcasting_to_floats(self): + """ + Integers should automatically be upcasted to floats for arithmetic + operations. + """ + a = int(3) + + # Addition with floats. + self.assertEqual(a + 0.5, 3.5) + self.assertEqual(0.5 + a, 3.5) + self.assertTrue(isinstance(a + 0.5, float)) + self.assertTrue(isinstance(0.5 + a, float)) + + # Subtraction with floats. + self.assertEqual(a - 0.5, 2.5) + self.assertEqual(0.5 - a, -2.5) + self.assertTrue(isinstance(a - 0.5, float)) + self.assertTrue(isinstance(0.5 - a, float)) + + # Multiplication with floats. + self.assertEqual(a * 0.5, 1.5) + self.assertEqual(0.5 * a, 1.5) + self.assertTrue(isinstance(a * 0.5, float)) + self.assertTrue(isinstance(0.5 * a, float)) + + # Division with floats. + self.assertEqual(a / 0.5, 6.0) + self.assertEqual(0.5 / a, 0.5 / 3.0) + self.assertTrue(isinstance(a / 0.5, float)) + self.assertTrue(isinstance(0.5 / a, float)) + + # Modulo with floats. + self.assertEqual(a % 0.5, 0.0) + self.assertEqual(0.5 % a, 0.5) + self.assertTrue(isinstance(a % 0.5, float)) + self.assertTrue(isinstance(0.5 % a, float)) + + # Power with floats. + self.assertEqual(1.0 ** a, 1.0) + self.assertTrue(isinstance(1.0 ** a, float)) + + self.assertEqual(a ** 1.0, a) + self.assertTrue(isinstance(a ** 1.0, float)) + + def test_upcasting_to_complex(self): + """ + Integers should automatically be upcasted to complex numbers for + arithmetic operations. + + Python 3 cannot mod complex numbers so this does not have to be + supported here. + """ + a = int(3) + + # Addition with complex. + self.assertEqual(a + 0.5j, 3.0 + 0.5j) + self.assertEqual(0.5j + a, 3.0 + 0.5j) + self.assertTrue(isinstance(a + 0.5j, complex)) + self.assertTrue(isinstance(0.5j + a, complex)) + + # Subtraction with complex. + self.assertEqual(a - 0.5j, 3.0 - 0.5j) + self.assertEqual(0.5j - a, -3.0 + 0.5j) + self.assertTrue(isinstance(a - 0.5j, complex)) + self.assertTrue(isinstance(0.5j - a, complex)) + + # Multiplication with complex. + self.assertEqual(a * 0.5j, 1.5j) + self.assertEqual(0.5j * a, 1.5j) + self.assertTrue(isinstance(a * 0.5j, complex)) + self.assertTrue(isinstance(0.5j * a, complex)) + + # Division with complex. + self.assertEqual(a / 0.5j, -6.0j) + self.assertEqual(0.5j / a, (0.5 / 3.0) * 1j) + self.assertTrue(isinstance(a / 0.5j, complex)) + self.assertTrue(isinstance(0.5j / a, complex)) + + # Power with floats. + self.assertEqual(5.0j ** int(1), 5.0j) + self.assertTrue(isinstance(5.0j ** int(1), complex)) + + self.assertEqual(a ** 1.0j, 3.0 ** 1.0j) + self.assertTrue(isinstance(a ** 1.0j, complex)) + + def test_more_arithmetics(self): + """ + More arithmetic tests to improve test coverage. + """ + a = int(3) + b = int(5) + c = int(-5) + + self.assertEqual(b - a, 2) + self.assertTrue(isinstance(b - a, int)) + + self.assertEqual(a * b, 15) + self.assertTrue(isinstance(a * b, int)) + + self.assertEqual(b % a, 2) + self.assertTrue(isinstance(b % a, int)) + + self.assertEqual(a ** b, 243) + self.assertTrue(isinstance(a ** b, int)) + + self.assertEqual(abs(c), 5) + self.assertEqual(abs(c), b) + self.assertTrue(isinstance(abs(c), int)) + + def test_bitwise_operations(self): + """ + Tests bitwise operations. + """ + a = int(3) + b = int(1) + + self.assertEqual(a >> b, 1) + self.assertEqual(a >> 1, 1) + self.assertTrue(isinstance(a >> b, int)) + self.assertTrue(isinstance(a >> 1, int)) + + self.assertEqual(a << b, 6) + self.assertEqual(a << 1, 6) + self.assertTrue(isinstance(a << b, int)) + self.assertTrue(isinstance(a << 1, int)) + + self.assertEqual(a & b, 1) + self.assertEqual(a & 1, 1) + self.assertTrue(isinstance(a & b, int)) + self.assertTrue(isinstance(a & 1, int)) + + self.assertEqual(a | b, 3) + self.assertEqual(a | 1, 3) + self.assertTrue(isinstance(a | b, int)) + self.assertTrue(isinstance(a | 1, int)) + + self.assertEqual(a ^ b, 2) + self.assertEqual(a ^ 1, 2) + self.assertTrue(isinstance(a ^ b, int)) + self.assertTrue(isinstance(a ^ 1, int)) + + self.assertEqual(~a, -4) + self.assertTrue(isinstance(~a, int)) + + def test_unary_operators(self): + a = int(3) + b = int(-3) + + self.assertEqual(+a, a) + self.assertEqual(+a, 3) + self.assertEqual(+b, b) + self.assertEqual(+b, -3) + self.assertTrue(isinstance(+a, int)) + + self.assertEqual(-a, b) + self.assertEqual(-a, -3) + self.assertEqual(-b, a) + self.assertEqual(-b, 3) + self.assertTrue(isinstance(-a, int)) + + def test_to_bytes(self): + def check(tests, byteorder, signed=False): + for test, expected in tests.items(): + try: + self.assertEqual( + int(test).to_bytes(len(expected), byteorder, signed=signed), + expected) + except Exception as err: + raise_from(AssertionError( + "failed to convert {0} with byteorder={1} and signed={2}" + .format(test, byteorder, signed)), err) + + # Convert integers to signed big-endian byte arrays. + tests1 = { + 0: bytes(b'\x00'), + 1: bytes(b'\x01'), + -1: bytes(b'\xff'), + -127: bytes(b'\x81'), + -128: bytes(b'\x80'), + -129: bytes(b'\xff\x7f'), + 127: bytes(b'\x7f'), + 129: bytes(b'\x00\x81'), + -255: bytes(b'\xff\x01'), + -256: bytes(b'\xff\x00'), + 255: bytes(b'\x00\xff'), + 256: bytes(b'\x01\x00'), + 32767: bytes(b'\x7f\xff'), + -32768: bytes(b'\xff\x80\x00'), + 65535: bytes(b'\x00\xff\xff'), + -65536: bytes(b'\xff\x00\x00'), + -8388608: bytes(b'\x80\x00\x00') + } + check(tests1, 'big', signed=True) + + # Convert integers to signed little-endian byte arrays. + tests2 = { + 0: bytes(b'\x00'), + 1: bytes(b'\x01'), + -1: bytes(b'\xff'), + -127: bytes(b'\x81'), + -128: bytes(b'\x80'), + -129: bytes(b'\x7f\xff'), + 127: bytes(b'\x7f'), + 129: bytes(b'\x81\x00'), + -255: bytes(b'\x01\xff'), + -256: bytes(b'\x00\xff'), + 255: bytes(b'\xff\x00'), + 256: bytes(b'\x00\x01'), + 32767: bytes(b'\xff\x7f'), + -32768: bytes(b'\x00\x80'), + 65535: bytes(b'\xff\xff\x00'), + -65536: bytes(b'\x00\x00\xff'), + -8388608: bytes(b'\x00\x00\x80') + } + check(tests2, 'little', signed=True) + + # Convert integers to unsigned big-endian byte arrays. + tests3 = { + 0: bytes(b'\x00'), + 1: bytes(b'\x01'), + 127: bytes(b'\x7f'), + 128: bytes(b'\x80'), + 255: bytes(b'\xff'), + 256: bytes(b'\x01\x00'), + 32767: bytes(b'\x7f\xff'), + 32768: bytes(b'\x80\x00'), + 65535: bytes(b'\xff\xff'), + 65536: bytes(b'\x01\x00\x00') + } + check(tests3, 'big', signed=False) + + # Convert integers to unsigned little-endian byte arrays. + tests4 = { + 0: bytes(b'\x00'), + 1: bytes(b'\x01'), + 127: bytes(b'\x7f'), + 128: bytes(b'\x80'), + 255: bytes(b'\xff'), + 256: bytes(b'\x00\x01'), + 32767: bytes(b'\xff\x7f'), + 32768: bytes(b'\x00\x80'), + 65535: bytes(b'\xff\xff'), + 65536: bytes(b'\x00\x00\x01') + } + check(tests4, 'little', signed=False) + + self.assertRaises(OverflowError, int(256).to_bytes, 1, 'big', signed=False) + self.assertRaises(OverflowError, int(256).to_bytes, 1, 'big', signed=True) + self.assertRaises(OverflowError, int(256).to_bytes, 1, 'little', signed=False) + self.assertRaises(OverflowError, int(256).to_bytes, 1, 'little', signed=True) + self.assertRaises(OverflowError, int(-1).to_bytes, 2, 'big', signed=False), + self.assertRaises(OverflowError, int(-1).to_bytes, 2, 'little', signed=False) + self.assertEqual(int(0).to_bytes(0, 'big'), b'') + self.assertEqual(int(1).to_bytes(5, 'big'), b'\x00\x00\x00\x00\x01') + self.assertEqual(int(0).to_bytes(5, 'big'), b'\x00\x00\x00\x00\x00') + self.assertEqual(int(-1).to_bytes(5, 'big', signed=True), + bytes(b'\xff\xff\xff\xff\xff')) + self.assertRaises(OverflowError, int(1).to_bytes, 0, 'big') + + def test_from_bytes(self): + def check(tests, byteorder, signed=False): + for test, expected in tests.items(): + try: + self.assertEqual( + int.from_bytes(test, byteorder, signed=signed), + int(expected)) + except Exception as err: + raise_from(AssertionError( + "failed to convert {0} with byteorder={1!r} and signed={2}" + .format(test, byteorder, signed)), err) + + # Convert signed big-endian byte arrays to integers. + tests1 = { + bytes(b''): 0, + bytes(b'\x00'): 0, + bytes(b'\x00\x00'): 0, + bytes(b'\x01'): 1, + bytes(b'\x00\x01'): 1, + bytes(b'\xff'): -1, + bytes(b'\xff\xff'): -1, + bytes(b'\x81'): -127, + bytes(b'\x80'): -128, + bytes(b'\xff\x7f'): -129, + bytes(b'\x7f'): 127, + bytes(b'\x00\x81'): 129, + bytes(b'\xff\x01'): -255, + bytes(b'\xff\x00'): -256, + bytes(b'\x00\xff'): 255, + bytes(b'\x01\x00'): 256, + bytes(b'\x7f\xff'): 32767, + bytes(b'\x80\x00'): -32768, + bytes(b'\x00\xff\xff'): 65535, + bytes(b'\xff\x00\x00'): -65536, + bytes(b'\x80\x00\x00'): -8388608 + } + # check(tests1, 'big', signed=True) + + # Convert signed little-endian byte arrays to integers. + tests2 = { + bytes(b''): 0, + bytes(b'\x00'): 0, + bytes(b'\x00\x00'): 0, + bytes(b'\x01'): 1, + bytes(b'\x00\x01'): 256, + bytes(b'\xff'): -1, + bytes(b'\xff\xff'): -1, + bytes(b'\x81'): -127, + bytes(b'\x80'): -128, + bytes(b'\x7f\xff'): -129, + bytes(b'\x7f'): 127, + bytes(b'\x81\x00'): 129, + bytes(b'\x01\xff'): -255, + bytes(b'\x00\xff'): -256, + bytes(b'\xff\x00'): 255, + bytes(b'\x00\x01'): 256, + bytes(b'\xff\x7f'): 32767, + bytes(b'\x00\x80'): -32768, + bytes(b'\xff\xff\x00'): 65535, + bytes(b'\x00\x00\xff'): -65536, + bytes(b'\x00\x00\x80'): -8388608 + } + # check(tests2, 'little', signed=True) + + # Convert unsigned big-endian byte arrays to integers. + tests3 = { + bytes(b''): 0, + bytes(b'\x00'): 0, + bytes(b'\x01'): 1, + bytes(b'\x7f'): 127, + bytes(b'\x80'): 128, + bytes(b'\xff'): 255, + bytes(b'\x01\x00'): 256, + bytes(b'\x7f\xff'): 32767, + bytes(b'\x80\x00'): 32768, + bytes(b'\xff\xff'): 65535, + bytes(b'\x01\x00\x00'): 65536, + } + check(tests3, 'big', signed=False) + + # Convert integers to unsigned little-endian byte arrays. + tests4 = { + bytes(b''): 0, + bytes(b'\x00'): 0, + bytes(b'\x01'): 1, + bytes(b'\x7f'): 127, + bytes(b'\x80'): 128, + bytes(b'\xff'): 255, + bytes(b'\x00\x01'): 256, + bytes(b'\xff\x7f'): 32767, + bytes(b'\x00\x80'): 32768, + bytes(b'\xff\xff'): 65535, + bytes(b'\x00\x00\x01'): 65536, + } + check(tests4, 'little', signed=False) + + class myint(int): + pass + + if PY2: + import __builtin__ + oldbytes = __builtin__.bytes + types = (bytes, oldbytes) + else: + types = (bytes,) + for mytype in types: + self.assertIs(type(myint.from_bytes(mytype(b'\x00'), 'big')), myint) + self.assertEqual(myint.from_bytes(mytype(b'\x01'), 'big'), 1) + self.assertIs( + type(myint.from_bytes(mytype(b'\x00'), 'big', signed=False)), myint) + self.assertEqual(myint.from_bytes(mytype(b'\x01'), 'big', signed=False), 1) + self.assertIs(type(myint.from_bytes(mytype(b'\x00'), 'little')), myint) + self.assertEqual(myint.from_bytes(mytype(b'\x01'), 'little'), 1) + self.assertIs(type(myint.from_bytes( + mytype(b'\x00'), 'little', signed=False)), myint) + self.assertEqual(myint.from_bytes(mytype(b'\x01'), 'little', signed=False), 1) + # self.assertEqual( + # int.from_bytes([255, 0, 0], 'big', signed=True), -65536) + # self.assertEqual( + # int.from_bytes((255, 0, 0), 'big', signed=True), -65536) + # self.assertEqual(int.from_bytes( + # bytearray(mytype(b'\xff\x00\x00')), 'big', signed=True), -65536) + # self.assertEqual(int.from_bytes( + # bytearray(mytype(b'\xff\x00\x00')), 'big', signed=True), -65536) + # self.assertEqual(int.from_bytes( + # array.array('B', mytype(b'\xff\x00\x00')), 'big', signed=True), -65536) + # self.assertEqual(int.from_bytes( + # memoryview(mytype(b'\xff\x00\x00')), 'big', signed=True), -65536) + + self.assertRaises(TypeError, int.from_bytes, u"", 'big') + self.assertRaises(TypeError, int.from_bytes, u"\x00", 'big') + self.assertRaises(TypeError, myint.from_bytes, u"", 'big') + self.assertRaises(TypeError, myint.from_bytes, u"\x00", 'big') + + types = (int, lambda x: x) if PY2 else (lambda x: x,) + for mytype in types: + self.assertRaises(ValueError, int.from_bytes, [mytype(256)], 'big') + self.assertRaises(ValueError, int.from_bytes, [mytype(0)], 'big\x00') + self.assertRaises(ValueError, int.from_bytes, [mytype(0)], 'little\x00') + self.assertRaises(TypeError, int.from_bytes, mytype(0), 'big') + # self.assertRaises(TypeError, int.from_bytes, mytype(0), 'big', True) + self.assertRaises(TypeError, myint.from_bytes, mytype(0), 'big') + # self.assertRaises(TypeError, int.from_bytes, mytype(0), 'big', True) + + @expectedFailurePY2 + def test_multiple_inheritance(self): + """ + Issue #96 (for newint instead of newobject) + """ + import collections.abc + + class Base(int): + pass + + class Foo(Base, collections.abc.Container): + def __add__(self, other): + return 0 + + @expectedFailurePY2 + def test_with_metaclass_and_int(self): + """ + Issue #91 (for newint instead of newobject) + """ + from future.utils import with_metaclass + + class MetaClass(type): + pass + + class TestClass(with_metaclass(MetaClass, int)): + pass + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_future/test_int_old_division.py b/tests/test_future/test_int_old_division.py new file mode 100644 index 00000000..be6fdedc --- /dev/null +++ b/tests/test_future/test_int_old_division.py @@ -0,0 +1,101 @@ +""" +Py2 only. int tests involving division for the case that: + + >>> from __future__ import division + +is not in effect. +""" + +from __future__ import (absolute_import, + print_function, unicode_literals) +from future import standard_library +from future.builtins import * +from future.tests.base import unittest +from future.utils import PY2 + +import sys +import random + + +@unittest.skipIf(not PY2, 'old division tests only for Py2') +class IntTestCasesOldDivision(unittest.TestCase): + + def setUp(self): + self.longMessage = True + + + def test_div(self): + """ + Issue #38 + """ + a = int(3) + self.assertEqual(a / 5., 0.6) + self.assertEqual(a / 5, 0) + + + def test_idiv(self): + a = int(3) + a /= 2 + self.assertEqual(a, 1) + self.assertTrue(isinstance(a, int)) + b = int(10) + b /= 2 + self.assertEqual(b, 5) + self.assertTrue(isinstance(b, int)) + c = int(-3) + c /= 2.0 + self.assertEqual(c, -1.5) + self.assertTrue(isinstance(c, float)) + + + def test_truediv(self): + """ + Test int.__truediv__ and friends (rtruediv, itruediv) + """ + a = int(3) + self.assertEqual(a / 2, 1) # since "from __future__ import division" + # is in effect + self.assertEqual(type(a / 2), int) + + b = int(2) + self.assertEqual(a / b, 1) # since "from __future__ import division" + # is in effect + self.assertEqual(type(a / b), int) + + c = int(3) / b + self.assertEqual(c, 1) + self.assertTrue(isinstance(c, int)) + + d = int(5) + d /= 5 + self.assertEqual(d, 1) + self.assertTrue(isinstance(d, int)) + + e = int(10) + f = int(20) + e /= f + self.assertEqual(e, 0) + self.assertTrue(isinstance(e, int)) + + + def test_divmod(self): + """ + Test int.__divmod__ + """ + vals = [10**i for i in range(0, 20)] + for i in range(200): + x = random.choice(vals) + y = random.choice(vals) + self.assertEqual(int(y).__rdivmod__(int(x)), divmod(x, y), msg='x={0}; y={1}'.format(x, y)) + self.assertEqual(int(-y).__rdivmod__(int(x)), divmod(x, -y), msg='x={0}; y={1}'.format(x, y)) + self.assertEqual(int(y).__rdivmod__(int(-x)), divmod(-x, y), msg='x={0}; y={1}'.format(x, y)) + self.assertEqual(int(-y).__rdivmod__(int(-x)), divmod(-x, -y), msg='x={0}; y={1}'.format(x, y)) + + self.assertEqual(int(x).__rdivmod__(int(y)), long(x).__rdivmod__(y), msg='x={0}; y={1}'.format(x, y)) + self.assertEqual(int(-x).__rdivmod__(int(y)), long(-x).__rdivmod__(y), msg='x={0}; y={1}'.format(x, y)) + self.assertEqual(int(x).__rdivmod__(int(-y)), long(x).__rdivmod__(-y), msg='x={0}; y={1}'.format(x, y)) + self.assertEqual(int(-x).__rdivmod__(int(-y)), long(-x).__rdivmod__(-y), msg='x={0}; y={1}'.format(x, y)) + + +if __name__ == "__main__": + unittest.main() diff --git a/future/tests/test_isinstance.py b/tests/test_future/test_isinstance.py similarity index 97% rename from future/tests/test_isinstance.py rename to tests/test_future/test_isinstance.py index c1b49e79..33c0a585 100644 --- a/future/tests/test_isinstance.py +++ b/tests/test_future/test_isinstance.py @@ -9,9 +9,8 @@ # tests use new style classes and properties, they actually do whitebox # testing of error conditions uncovered when using extension types. -import unittest -from test import support import sys +from future.tests.base import unittest class TestIsInstanceExceptions(unittest.TestCase): @@ -284,13 +283,5 @@ def blowstack(fxn, arg, compare_to): fxn(arg, tuple_arg) -def test_main(): - support.run_unittest( - TestIsInstanceExceptions, - TestIsSubclassExceptions, - TestIsInstanceIsSubclass - ) - - if __name__ == '__main__': - test_main() + unittest.main() diff --git a/future/tests/test_libfuturize_fixers.py b/tests/test_future/test_libfuturize_fixers.py similarity index 92% rename from future/tests/test_libfuturize_fixers.py rename to tests/test_future/test_libfuturize_fixers.py index 6fed6d42..2146d1f2 100644 --- a/future/tests/test_libfuturize_fixers.py +++ b/tests/test_future/test_libfuturize_fixers.py @@ -9,7 +9,6 @@ import sys import os import os.path -import re from itertools import chain from textwrap import dedent from operator import itemgetter @@ -18,6 +17,7 @@ # Local imports from future.tests.base import unittest +from future.builtins import str test_dir = os.path.dirname(__file__) @@ -25,7 +25,7 @@ # grammar_path = os.path.join(test_dir, "..", "Grammar.txt") # grammar = driver.load_grammar(grammar_path) # driver = driver.Driver(grammar, convert=pytree.convert) -# +# # def parse_string(string): # return driver.parse_string(reformat(string), debug=True) @@ -78,7 +78,7 @@ def _check(self, before, after): before = reformat(before) after = reformat(after) tree = self.refactor.refactor_string(before, self.filename) - self.assertEqual(after, unicode(tree)) + self.assertEqual(after, str(tree)) return tree def check(self, before, after, ignore_warnings=False): @@ -118,319 +118,350 @@ def assert_runs_after(self, *names): ############### EDIT the tests below ... -# +# # class Test_ne(FixerTestCase): # fixer = "ne" -# +# # def test_basic(self): # b = """if x <> y: # pass""" -# +# # a = """if x != y: # pass""" # self.check(b, a) -# -# +# +# # class Test_print(FixerTestCase): # fixer = "print_" -# +# # def test_print(self): # b = """print 'Hello world'""" # a = """from __future__ import print_function\nprint('Hello world')""" # self.check(b, a) -# -# +# +# # class Test_apply(FixerTestCase): # fixer = "apply" -# +# # def test_1(self): # b = """x = apply(f, g + h)""" # a = """x = f(*g + h)""" # self.check(b, a) -# -# +# +# # class Test_intern(FixerTestCase): # fixer = "intern" -# +# # def test_prefix_preservation(self): # b = """x = intern( a )""" # a = """import sys\nx = sys.intern( a )""" # self.check(b, a) -# +# # b = """y = intern("b" # test # )""" # a = """import sys\ny = sys.intern("b" # test # )""" # self.check(b, a) -# +# # b = """z = intern(a+b+c.d, )""" # a = """import sys\nz = sys.intern(a+b+c.d, )""" # self.check(b, a) -# +# # def test(self): # b = """x = intern(a)""" # a = """import sys\nx = sys.intern(a)""" # self.check(b, a) -# +# # b = """z = intern(a+b+c.d,)""" # a = """import sys\nz = sys.intern(a+b+c.d,)""" # self.check(b, a) -# +# # b = """intern("y%s" % 5).replace("y", "")""" # a = """import sys\nsys.intern("y%s" % 5).replace("y", "")""" # self.check(b, a) -# +# # # These should not be refactored -# +# # def test_unchanged(self): # s = """intern(a=1)""" # self.unchanged(s) -# +# # s = """intern(f, g)""" # self.unchanged(s) -# +# # s = """intern(*h)""" # self.unchanged(s) -# +# # s = """intern(**i)""" # self.unchanged(s) -# +# # s = """intern()""" # self.unchanged(s) -# +# # class Test_reduce(FixerTestCase): # fixer = "reduce" -# +# # def test_simple_call(self): # b = "reduce(a, b, c)" # a = "from functools import reduce\nreduce(a, b, c)" # self.check(b, a) -# +# # def test_bug_7253(self): # # fix_tuple_params was being bad and orphaning nodes in the tree. # b = "def x(arg): reduce(sum, [])" # a = "from functools import reduce\ndef x(arg): reduce(sum, [])" # self.check(b, a) -# +# # def test_call_with_lambda(self): # b = "reduce(lambda x, y: x + y, seq)" # a = "from functools import reduce\nreduce(lambda x, y: x + y, seq)" # self.check(b, a) -# +# # def test_unchanged(self): # s = "reduce(a)" # self.unchanged(s) -# +# # s = "reduce(a, b=42)" # self.unchanged(s) -# +# # s = "reduce(a, b, c, d)" # self.unchanged(s) -# +# # s = "reduce(**c)" # self.unchanged(s) -# +# # s = "reduce()" # self.unchanged(s) -# -# class Test_print(FixerTestCase): -# fixer = "print" -# -# def test_prefix_preservation(self): -# b = """print 1, 1+1, 1+1+1""" -# a = """print(1, 1+1, 1+1+1)""" -# self.check(b, a) -# -# def test_idempotency(self): -# s = """print()""" -# self.unchanged(s) -# -# s = """print('')""" -# self.unchanged(s) -# -# def test_idempotency_print_as_function(self): -# self.refactor.driver.grammar = pygram.python_grammar_no_print_statement -# s = """print(1, 1+1, 1+1+1)""" -# self.unchanged(s) -# -# s = """print()""" -# self.unchanged(s) -# -# s = """print('')""" -# self.unchanged(s) -# -# def test_1(self): -# b = """print 1, 1+1, 1+1+1""" -# a = """print(1, 1+1, 1+1+1)""" -# self.check(b, a) -# -# def test_2(self): -# b = """print 1, 2""" -# a = """print(1, 2)""" -# self.check(b, a) -# -# def test_3(self): -# b = """print""" -# a = """print()""" -# self.check(b, a) -# -# def test_4(self): -# # from bug 3000 -# b = """print whatever; print""" -# a = """print(whatever); print()""" -# self.check(b, a) -# -# def test_5(self): -# b = """print; print whatever;""" -# a = """print(); print(whatever);""" -# self.check(b, a) -# -# def test_tuple(self): -# b = """print (a, b, c)""" -# a = """print((a, b, c))""" -# self.check(b, a) -# -# # trailing commas -# -# def test_trailing_comma_1(self): -# b = """print 1, 2, 3,""" -# a = """print(1, 2, 3, end=' ')""" -# self.check(b, a) -# -# def test_trailing_comma_2(self): -# b = """print 1, 2,""" -# a = """print(1, 2, end=' ')""" -# self.check(b, a) -# -# def test_trailing_comma_3(self): -# b = """print 1,""" -# a = """print(1, end=' ')""" -# self.check(b, a) -# -# # >> stuff -# -# def test_vargs_without_trailing_comma(self): -# b = """print >>sys.stderr, 1, 2, 3""" -# a = """print(1, 2, 3, file=sys.stderr)""" -# self.check(b, a) -# -# def test_with_trailing_comma(self): -# b = """print >>sys.stderr, 1, 2,""" -# a = """print(1, 2, end=' ', file=sys.stderr)""" -# self.check(b, a) -# -# def test_no_trailing_comma(self): -# b = """print >>sys.stderr, 1+1""" -# a = """print(1+1, file=sys.stderr)""" -# self.check(b, a) -# -# def test_spaces_before_file(self): -# b = """print >> sys.stderr""" -# a = """print(file=sys.stderr)""" -# self.check(b, a) -# -# def test_with_future_print_function(self): -# s = "from __future__ import print_function\n" \ -# "print('Hai!', end=' ')" -# self.unchanged(s) -# -# b = "print 'Hello, world!'" -# a = "print('Hello, world!')" -# self.check(b, a) -# -# + +class Test_print(FixerTestCase): + fixer = "print" + + def test_prefix_preservation(self): + b = """print 1, 1+1, 1+1+1""" + a = """print(1, 1+1, 1+1+1)""" + self.check(b, a) + + def test_idempotency(self): + s = """print()""" + self.unchanged(s) + + s = """print('')""" + self.unchanged(s) + + def test_idempotency_print_as_function(self): + self.refactor.driver.grammar = pygram.python_grammar_no_print_statement + s = """print(1, 1+1, 1+1+1)""" + self.unchanged(s) + + s = """print()""" + self.unchanged(s) + + s = """print('')""" + self.unchanged(s) + + def test_1(self): + b = """print 1, 1+1, 1+1+1""" + a = """print(1, 1+1, 1+1+1)""" + self.check(b, a) + + def test_2(self): + b = """print 1, 2""" + a = """print(1, 2)""" + self.check(b, a) + + def test_3(self): + b = """print""" + a = """print()""" + self.check(b, a) + + def test_4(self): + # from bug 3000 + b = """print whatever; print""" + a = """print(whatever); print()""" + self.check(b, a) + + def test_5(self): + b = """print; print whatever;""" + a = """print(); print(whatever);""" + self.check(b, a) + + def test_tuple(self): + b = """print (a, b, c)""" + a = """print((a, b, c))""" + self.check(b, a) + + # trailing commas + + def test_trailing_comma_1(self): + b = """print 1, 2, 3,""" + a = """print(1, 2, 3, end=' ')""" + self.check(b, a) + + def test_trailing_comma_2(self): + b = """print 1, 2,""" + a = """print(1, 2, end=' ')""" + self.check(b, a) + + def test_trailing_comma_3(self): + b = """print 1,""" + a = """print(1, end=' ')""" + self.check(b, a) + + def test_trailing_comma_4(self): + b = """print "a ",""" + a = """print("a ", end=' ')""" + self.check(b, a) + + def test_trailing_comma_5(self): + b = r"""print "b\t",""" + a = r"""print("b\t", end='')""" + self.check(b, a) + + def test_trailing_comma_6(self): + b = r"""print "c\n",""" + a = r"""print("c\n", end='')""" + self.check(b, a) + + def test_trailing_comma_7(self): + b = r"""print "d\r",""" + a = r"""print("d\r", end='')""" + self.check(b, a) + + def test_trailing_comma_8(self): + b = r"""print "%s\n" % (1,),""" + a = r"""print("%s\n" % (1,), end='')""" + self.check(b, a) + + + def test_trailing_comma_9(self): + b = r"""print r"e\n",""" + a = r"""print(r"e\n", end=' ')""" + self.check(b, a) + + # >> stuff + + def test_vargs_without_trailing_comma(self): + b = """print >>sys.stderr, 1, 2, 3""" + a = """print(1, 2, 3, file=sys.stderr)""" + self.check(b, a) + + def test_with_trailing_comma(self): + b = """print >>sys.stderr, 1, 2,""" + a = """print(1, 2, end=' ', file=sys.stderr)""" + self.check(b, a) + + def test_no_trailing_comma(self): + b = """print >>sys.stderr, 1+1""" + a = """print(1+1, file=sys.stderr)""" + self.check(b, a) + + def test_spaces_before_file(self): + b = """print >> sys.stderr""" + a = """print(file=sys.stderr)""" + self.check(b, a) + + def test_with_future_print_function(self): + s = "from __future__ import print_function\n" \ + "print('Hai!', end=' ')" + self.unchanged(s) + + b = "print 'Hello, world!'" + a = "print('Hello, world!')" + self.check(b, a) + + # class Test_exec(FixerTestCase): # fixer = "exec" -# +# # def test_prefix_preservation(self): # b = """ exec code in ns1, ns2""" # a = """ exec(code, ns1, ns2)""" # self.check(b, a) -# +# # def test_basic(self): # b = """exec code""" # a = """exec(code)""" # self.check(b, a) -# +# # def test_with_globals(self): # b = """exec code in ns""" # a = """exec(code, ns)""" # self.check(b, a) -# +# # def test_with_globals_locals(self): # b = """exec code in ns1, ns2""" # a = """exec(code, ns1, ns2)""" # self.check(b, a) -# +# # def test_complex_1(self): # b = """exec (a.b()) in ns""" # a = """exec((a.b()), ns)""" # self.check(b, a) -# +# # def test_complex_2(self): # b = """exec a.b() + c in ns""" # a = """exec(a.b() + c, ns)""" # self.check(b, a) -# +# # # These should not be touched -# +# # def test_unchanged_1(self): # s = """exec(code)""" # self.unchanged(s) -# +# # def test_unchanged_2(self): # s = """exec (code)""" # self.unchanged(s) -# +# # def test_unchanged_3(self): # s = """exec(code, ns)""" # self.unchanged(s) -# +# # def test_unchanged_4(self): # s = """exec(code, ns1, ns2)""" # self.unchanged(s) -# +# # class Test_repr(FixerTestCase): # fixer = "repr" -# +# # def test_prefix_preservation(self): # b = """x = `1 + 2`""" # a = """x = repr(1 + 2)""" # self.check(b, a) -# +# # def test_simple_1(self): # b = """x = `1 + 2`""" # a = """x = repr(1 + 2)""" # self.check(b, a) -# +# # def test_simple_2(self): # b = """y = `x`""" # a = """y = repr(x)""" # self.check(b, a) -# +# # def test_complex(self): # b = """z = `y`.__repr__()""" # a = """z = repr(y).__repr__()""" # self.check(b, a) -# +# # def test_tuple(self): # b = """x = `1, 2, 3`""" # a = """x = repr((1, 2, 3))""" # self.check(b, a) -# +# # def test_nested(self): # b = """x = `1 + `2``""" # a = """x = repr(1 + repr(2))""" # self.check(b, a) -# +# # def test_nested_tuples(self): # b = """x = `1, 2 + `3, 4``""" # a = """x = repr((1, 2 + repr((3, 4))))""" # self.check(b, a) -# +# # class Test_except(FixerTestCase): # fixer = "except" -# +# # def test_prefix_preservation(self): # b = """ # try: @@ -443,7 +474,7 @@ def assert_runs_after(self, *names): # except (RuntimeError, ImportError) as e: # pass""" # self.check(b, a) -# +# # def test_simple(self): # b = """ # try: @@ -456,7 +487,7 @@ def assert_runs_after(self, *names): # except Foo as e: # pass""" # self.check(b, a) -# +# # def test_simple_no_space_before_target(self): # b = """ # try: @@ -469,7 +500,7 @@ def assert_runs_after(self, *names): # except Foo as e: # pass""" # self.check(b, a) -# +# # def test_tuple_unpack(self): # b = """ # def foo(): @@ -479,7 +510,7 @@ def assert_runs_after(self, *names): # pass # except ImportError, e: # pass""" -# +# # a = """ # def foo(): # try: @@ -490,28 +521,28 @@ def assert_runs_after(self, *names): # except ImportError as e: # pass""" # self.check(b, a) -# +# # def test_multi_class(self): # b = """ # try: # pass # except (RuntimeError, ImportError), e: # pass""" -# +# # a = """ # try: # pass # except (RuntimeError, ImportError) as e: # pass""" # self.check(b, a) -# +# # def test_list_unpack(self): # b = """ # try: # pass # except Exception, [a, b]: # pass""" -# +# # a = """ # try: # pass @@ -519,14 +550,14 @@ def assert_runs_after(self, *names): # [a, b] = xxx_todo_changeme.args # pass""" # self.check(b, a) -# +# # def test_weird_target_1(self): # b = """ # try: # pass # except Exception, d[5]: # pass""" -# +# # a = """ # try: # pass @@ -534,14 +565,14 @@ def assert_runs_after(self, *names): # d[5] = xxx_todo_changeme # pass""" # self.check(b, a) -# +# # def test_weird_target_2(self): # b = """ # try: # pass # except Exception, a.foo: # pass""" -# +# # a = """ # try: # pass @@ -549,14 +580,14 @@ def assert_runs_after(self, *names): # a.foo = xxx_todo_changeme # pass""" # self.check(b, a) -# +# # def test_weird_target_3(self): # b = """ # try: # pass # except Exception, a().foo: # pass""" -# +# # a = """ # try: # pass @@ -564,7 +595,7 @@ def assert_runs_after(self, *names): # a().foo = xxx_todo_changeme # pass""" # self.check(b, a) -# +# # def test_bare_except(self): # b = """ # try: @@ -573,7 +604,7 @@ def assert_runs_after(self, *names): # pass # except: # pass""" -# +# # a = """ # try: # pass @@ -582,7 +613,7 @@ def assert_runs_after(self, *names): # except: # pass""" # self.check(b, a) -# +# # def test_bare_except_and_else_finally(self): # b = """ # try: @@ -595,7 +626,7 @@ def assert_runs_after(self, *names): # pass # finally: # pass""" -# +# # a = """ # try: # pass @@ -608,7 +639,7 @@ def assert_runs_after(self, *names): # finally: # pass""" # self.check(b, a) -# +# # def test_multi_fixed_excepts_before_bare_except(self): # b = """ # try: @@ -619,7 +650,7 @@ def assert_runs_after(self, *names): # pass # except: # pass""" -# +# # a = """ # try: # pass @@ -630,7 +661,7 @@ def assert_runs_after(self, *names): # except: # pass""" # self.check(b, a) -# +# # def test_one_line_suites(self): # b = """ # try: raise TypeError @@ -676,9 +707,9 @@ def assert_runs_after(self, *names): # finally: done() # """ # self.check(b, a) -# +# # # These should not be touched: -# +# # def test_unchanged_1(self): # s = """ # try: @@ -686,7 +717,7 @@ def assert_runs_after(self, *names): # except: # pass""" # self.unchanged(s) -# +# # def test_unchanged_2(self): # s = """ # try: @@ -694,7 +725,7 @@ def assert_runs_after(self, *names): # except Exception: # pass""" # self.unchanged(s) -# +# # def test_unchanged_3(self): # s = """ # try: @@ -702,194 +733,215 @@ def assert_runs_after(self, *names): # except (Exception, SystemExit): # pass""" # self.unchanged(s) -# -# class Test_raise(FixerTestCase): -# fixer = "raise" -# -# def test_basic(self): -# b = """raise Exception, 5""" -# a = """raise Exception(5)""" -# self.check(b, a) -# -# def test_prefix_preservation(self): -# b = """raise Exception,5""" -# a = """raise Exception(5)""" -# self.check(b, a) -# -# b = """raise Exception, 5""" -# a = """raise Exception(5)""" -# self.check(b, a) -# -# def test_with_comments(self): -# b = """raise Exception, 5 # foo""" -# a = """raise Exception(5) # foo""" -# self.check(b, a) -# -# b = """raise E, (5, 6) % (a, b) # foo""" -# a = """raise E((5, 6) % (a, b)) # foo""" -# self.check(b, a) -# -# b = """def foo(): -# raise Exception, 5, 6 # foo""" -# a = """def foo(): -# raise Exception(5).with_traceback(6) # foo""" -# self.check(b, a) -# -# def test_None_value(self): -# b = """raise Exception(5), None, tb""" -# a = """raise Exception(5).with_traceback(tb)""" -# self.check(b, a) -# -# def test_tuple_value(self): -# b = """raise Exception, (5, 6, 7)""" -# a = """raise Exception(5, 6, 7)""" -# self.check(b, a) -# -# def test_tuple_detection(self): -# b = """raise E, (5, 6) % (a, b)""" -# a = """raise E((5, 6) % (a, b))""" -# self.check(b, a) -# -# def test_tuple_exc_1(self): -# b = """raise (((E1, E2), E3), E4), V""" -# a = """raise E1(V)""" -# self.check(b, a) -# -# def test_tuple_exc_2(self): -# b = """raise (E1, (E2, E3), E4), V""" -# a = """raise E1(V)""" -# self.check(b, a) -# -# # These should produce a warning -# -# def test_string_exc(self): -# s = """raise 'foo'""" -# self.warns_unchanged(s, "Python 3 does not support string exceptions") -# -# def test_string_exc_val(self): -# s = """raise "foo", 5""" -# self.warns_unchanged(s, "Python 3 does not support string exceptions") -# -# def test_string_exc_val_tb(self): -# s = """raise "foo", 5, 6""" -# self.warns_unchanged(s, "Python 3 does not support string exceptions") -# -# # These should result in traceback-assignment -# -# def test_tb_1(self): -# b = """def foo(): -# raise Exception, 5, 6""" -# a = """def foo(): -# raise Exception(5).with_traceback(6)""" -# self.check(b, a) -# -# def test_tb_2(self): -# b = """def foo(): -# a = 5 -# raise Exception, 5, 6 -# b = 6""" -# a = """def foo(): -# a = 5 -# raise Exception(5).with_traceback(6) -# b = 6""" -# self.check(b, a) -# -# def test_tb_3(self): -# b = """def foo(): -# raise Exception,5,6""" -# a = """def foo(): -# raise Exception(5).with_traceback(6)""" -# self.check(b, a) -# -# def test_tb_4(self): -# b = """def foo(): -# a = 5 -# raise Exception,5,6 -# b = 6""" -# a = """def foo(): -# a = 5 -# raise Exception(5).with_traceback(6) -# b = 6""" -# self.check(b, a) -# -# def test_tb_5(self): -# b = """def foo(): -# raise Exception, (5, 6, 7), 6""" -# a = """def foo(): -# raise Exception(5, 6, 7).with_traceback(6)""" -# self.check(b, a) -# -# def test_tb_6(self): -# b = """def foo(): -# a = 5 -# raise Exception, (5, 6, 7), 6 -# b = 6""" -# a = """def foo(): -# a = 5 -# raise Exception(5, 6, 7).with_traceback(6) -# b = 6""" -# self.check(b, a) -# + +class Test_raise(FixerTestCase): + fixer = "raise" + + def test_basic(self): + b = """raise Exception, 5""" + a = """raise Exception(5)""" + self.check(b, a) + + def test_prefix_preservation(self): + b = """raise Exception,5""" + a = """raise Exception(5)""" + self.check(b, a) + + b = """raise Exception, 5""" + a = """raise Exception(5)""" + self.check(b, a) + + def test_with_comments(self): + b = """raise Exception, 5 # foo""" + a = """raise Exception(5) # foo""" + self.check(b, a) + + b = """def foo(): + raise Exception, 5, 6 # foo""" + a = """def foo(): + raise Exception(5).with_traceback(6) # foo""" + self.check(b, a) + + def test_None_value(self): + b = """raise Exception(5), None, tb""" + a = """raise Exception(5).with_traceback(tb)""" + self.check(b, a) + + def test_tuple_value(self): + b = """raise Exception, (5, 6, 7)""" + a = """raise Exception(5, 6, 7)""" + self.check(b, a) + + def test_tuple_exc_1(self): + b = """raise (((E1, E2), E3), E4), 5""" + a = """raise E1(5)""" + self.check(b, a) + + def test_tuple_exc_2(self): + b = """raise (E1, (E2, E3), E4), 5""" + a = """raise E1(5)""" + self.check(b, a) + + def test_unknown_value(self): + b = """ + raise E, V""" + a = """ + from future.utils import raise_ + raise_(E, V)""" + self.check(b, a) + + def test_unknown_value_with_traceback_with_comments(self): + b = """ + raise E, Func(arg1, arg2, arg3), tb # foo""" + a = """ + from future.utils import raise_ + raise_(E, Func(arg1, arg2, arg3), tb) # foo""" + self.check(b, a) + + def test_unknown_value_with_indent(self): + b = """ + while True: + print() # another expression in the same block triggers different parsing + raise E, V + """ + a = """ + from future.utils import raise_ + while True: + print() # another expression in the same block triggers different parsing + raise_(E, V) + """ + self.check(b, a) + + # These should produce a warning + + def test_string_exc(self): + s = """raise 'foo'""" + self.warns_unchanged(s, "Python 3 does not support string exceptions") + + def test_string_exc_val(self): + s = """raise "foo", 5""" + self.warns_unchanged(s, "Python 3 does not support string exceptions") + + def test_string_exc_val_tb(self): + s = """raise "foo", 5, 6""" + self.warns_unchanged(s, "Python 3 does not support string exceptions") + + # These should result in traceback-assignment + + def test_tb_1(self): + b = """def foo(): + raise Exception, 5, 6""" + a = """def foo(): + raise Exception(5).with_traceback(6)""" + self.check(b, a) + + def test_tb_2(self): + b = """def foo(): + a = 5 + raise Exception, 5, 6 + b = 6""" + a = """def foo(): + a = 5 + raise Exception(5).with_traceback(6) + b = 6""" + self.check(b, a) + + def test_tb_3(self): + b = """def foo(): + raise Exception,5,6""" + a = """def foo(): + raise Exception(5).with_traceback(6)""" + self.check(b, a) + + def test_tb_4(self): + b = """def foo(): + a = 5 + raise Exception,5,6 + b = 6""" + a = """def foo(): + a = 5 + raise Exception(5).with_traceback(6) + b = 6""" + self.check(b, a) + + def test_tb_5(self): + b = """def foo(): + raise Exception, (5, 6, 7), 6""" + a = """def foo(): + raise Exception(5, 6, 7).with_traceback(6)""" + self.check(b, a) + + def test_tb_6(self): + b = """def foo(): + a = 5 + raise Exception, (5, 6, 7), 6 + b = 6""" + a = """def foo(): + a = 5 + raise Exception(5, 6, 7).with_traceback(6) + b = 6""" + self.check(b, a) +# # class Test_throw(FixerTestCase): # fixer = "throw" -# +# # def test_1(self): # b = """g.throw(Exception, 5)""" # a = """g.throw(Exception(5))""" # self.check(b, a) -# +# # def test_2(self): # b = """g.throw(Exception,5)""" # a = """g.throw(Exception(5))""" # self.check(b, a) -# +# # def test_3(self): # b = """g.throw(Exception, (5, 6, 7))""" # a = """g.throw(Exception(5, 6, 7))""" # self.check(b, a) -# +# # def test_4(self): # b = """5 + g.throw(Exception, 5)""" # a = """5 + g.throw(Exception(5))""" # self.check(b, a) -# +# # # These should produce warnings -# +# # def test_warn_1(self): # s = """g.throw("foo")""" # self.warns_unchanged(s, "Python 3 does not support string exceptions") -# +# # def test_warn_2(self): # s = """g.throw("foo", 5)""" # self.warns_unchanged(s, "Python 3 does not support string exceptions") -# +# # def test_warn_3(self): # s = """g.throw("foo", 5, 6)""" # self.warns_unchanged(s, "Python 3 does not support string exceptions") -# +# # # These should not be touched -# +# # def test_untouched_1(self): # s = """g.throw(Exception)""" # self.unchanged(s) -# +# # def test_untouched_2(self): # s = """g.throw(Exception(5, 6))""" # self.unchanged(s) -# +# # def test_untouched_3(self): # s = """5 + g.throw(Exception(5, 6))""" # self.unchanged(s) -# +# # # These should result in traceback-assignment -# +# # def test_tb_1(self): # b = """def foo(): # g.throw(Exception, 5, 6)""" # a = """def foo(): # g.throw(Exception(5).with_traceback(6))""" # self.check(b, a) -# +# # def test_tb_2(self): # b = """def foo(): # a = 5 @@ -900,14 +952,14 @@ def assert_runs_after(self, *names): # g.throw(Exception(5).with_traceback(6)) # b = 6""" # self.check(b, a) -# +# # def test_tb_3(self): # b = """def foo(): # g.throw(Exception,5,6)""" # a = """def foo(): # g.throw(Exception(5).with_traceback(6))""" # self.check(b, a) -# +# # def test_tb_4(self): # b = """def foo(): # a = 5 @@ -918,14 +970,14 @@ def assert_runs_after(self, *names): # g.throw(Exception(5).with_traceback(6)) # b = 6""" # self.check(b, a) -# +# # def test_tb_5(self): # b = """def foo(): # g.throw(Exception, (5, 6, 7), 6)""" # a = """def foo(): # g.throw(Exception(5, 6, 7).with_traceback(6))""" # self.check(b, a) -# +# # def test_tb_6(self): # b = """def foo(): # a = 5 @@ -936,14 +988,14 @@ def assert_runs_after(self, *names): # g.throw(Exception(5, 6, 7).with_traceback(6)) # b = 6""" # self.check(b, a) -# +# # def test_tb_7(self): # b = """def foo(): # a + g.throw(Exception, 5, 6)""" # a = """def foo(): # a + g.throw(Exception(5).with_traceback(6))""" # self.check(b, a) -# +# # def test_tb_8(self): # b = """def foo(): # a = 5 @@ -954,596 +1006,596 @@ def assert_runs_after(self, *names): # a + g.throw(Exception(5).with_traceback(6)) # b = 6""" # self.check(b, a) -# +# # class Test_long(FixerTestCase): # fixer = "long" -# +# # def test_1(self): # b = """x = long(x)""" # a = """x = int(x)""" # self.check(b, a) -# +# # def test_2(self): # b = """y = isinstance(x, long)""" # a = """y = isinstance(x, int)""" # self.check(b, a) -# +# # def test_3(self): # b = """z = type(x) in (int, long)""" # a = """z = type(x) in (int, int)""" # self.check(b, a) -# +# # def test_unchanged(self): # s = """long = True""" # self.unchanged(s) -# +# # s = """s.long = True""" # self.unchanged(s) -# +# # s = """def long(): pass""" # self.unchanged(s) -# +# # s = """class long(): pass""" # self.unchanged(s) -# +# # s = """def f(long): pass""" # self.unchanged(s) -# +# # s = """def f(g, long): pass""" # self.unchanged(s) -# +# # s = """def f(x, long=True): pass""" # self.unchanged(s) -# +# # def test_prefix_preservation(self): # b = """x = long( x )""" # a = """x = int( x )""" # self.check(b, a) -# -# +# +# # class Test_execfile(FixerTestCase): # fixer = "execfile" -# +# # def test_conversion(self): # b = """execfile("fn")""" # a = """exec(compile(open("fn").read(), "fn", 'exec'))""" # self.check(b, a) -# +# # b = """execfile("fn", glob)""" # a = """exec(compile(open("fn").read(), "fn", 'exec'), glob)""" # self.check(b, a) -# +# # b = """execfile("fn", glob, loc)""" # a = """exec(compile(open("fn").read(), "fn", 'exec'), glob, loc)""" # self.check(b, a) -# +# # b = """execfile("fn", globals=glob)""" # a = """exec(compile(open("fn").read(), "fn", 'exec'), globals=glob)""" # self.check(b, a) -# +# # b = """execfile("fn", locals=loc)""" # a = """exec(compile(open("fn").read(), "fn", 'exec'), locals=loc)""" # self.check(b, a) -# +# # b = """execfile("fn", globals=glob, locals=loc)""" # a = """exec(compile(open("fn").read(), "fn", 'exec'), globals=glob, locals=loc)""" # self.check(b, a) -# +# # def test_spacing(self): # b = """execfile( "fn" )""" # a = """exec(compile(open( "fn" ).read(), "fn", 'exec'))""" # self.check(b, a) -# +# # b = """execfile("fn", globals = glob)""" # a = """exec(compile(open("fn").read(), "fn", 'exec'), globals = glob)""" # self.check(b, a) -# -# +# +# # class Test_isinstance(FixerTestCase): # fixer = "isinstance" -# +# # def test_remove_multiple_items(self): # b = """isinstance(x, (int, int, int))""" # a = """isinstance(x, int)""" # self.check(b, a) -# +# # b = """isinstance(x, (int, float, int, int, float))""" # a = """isinstance(x, (int, float))""" # self.check(b, a) -# +# # b = """isinstance(x, (int, float, int, int, float, str))""" # a = """isinstance(x, (int, float, str))""" # self.check(b, a) -# +# # b = """isinstance(foo() + bar(), (x(), y(), x(), int, int))""" # a = """isinstance(foo() + bar(), (x(), y(), x(), int))""" # self.check(b, a) -# +# # def test_prefix_preservation(self): # b = """if isinstance( foo(), ( bar, bar, baz )) : pass""" # a = """if isinstance( foo(), ( bar, baz )) : pass""" # self.check(b, a) -# +# # def test_unchanged(self): # self.unchanged("isinstance(x, (str, int))") -# +# # class Test_dict(FixerTestCase): # fixer = "dict" -# +# # def test_prefix_preservation(self): # b = "if d. keys ( ) : pass" # a = "if list(d. keys ( )) : pass" # self.check(b, a) -# +# # b = "if d. items ( ) : pass" # a = "if list(d. items ( )) : pass" # self.check(b, a) -# +# # b = "if d. iterkeys ( ) : pass" # a = "if iter(d. keys ( )) : pass" # self.check(b, a) -# +# # b = "[i for i in d. iterkeys( ) ]" # a = "[i for i in d. keys( ) ]" # self.check(b, a) -# +# # b = "if d. viewkeys ( ) : pass" # a = "if d. keys ( ) : pass" # self.check(b, a) -# +# # b = "[i for i in d. viewkeys( ) ]" # a = "[i for i in d. keys( ) ]" # self.check(b, a) -# +# # def test_trailing_comment(self): # b = "d.keys() # foo" # a = "list(d.keys()) # foo" # self.check(b, a) -# +# # b = "d.items() # foo" # a = "list(d.items()) # foo" # self.check(b, a) -# +# # b = "d.iterkeys() # foo" # a = "iter(d.keys()) # foo" # self.check(b, a) -# +# # b = """[i for i in d.iterkeys() # foo # ]""" # a = """[i for i in d.keys() # foo # ]""" # self.check(b, a) -# +# # b = """[i for i in d.iterkeys() # foo # ]""" # a = """[i for i in d.keys() # foo # ]""" # self.check(b, a) -# +# # b = "d.viewitems() # foo" # a = "d.items() # foo" # self.check(b, a) -# +# # def test_unchanged(self): # for wrapper in fixer_util.consuming_calls: # s = "s = %s(d.keys())" % wrapper # self.unchanged(s) -# +# # s = "s = %s(d.values())" % wrapper # self.unchanged(s) -# +# # s = "s = %s(d.items())" % wrapper # self.unchanged(s) -# +# # def test_01(self): # b = "d.keys()" # a = "list(d.keys())" # self.check(b, a) -# +# # b = "a[0].foo().keys()" # a = "list(a[0].foo().keys())" # self.check(b, a) -# +# # def test_02(self): # b = "d.items()" # a = "list(d.items())" # self.check(b, a) -# +# # def test_03(self): # b = "d.values()" # a = "list(d.values())" # self.check(b, a) -# +# # def test_04(self): # b = "d.iterkeys()" # a = "iter(d.keys())" # self.check(b, a) -# +# # def test_05(self): # b = "d.iteritems()" # a = "iter(d.items())" # self.check(b, a) -# +# # def test_06(self): # b = "d.itervalues()" # a = "iter(d.values())" # self.check(b, a) -# +# # def test_07(self): # s = "list(d.keys())" # self.unchanged(s) -# +# # def test_08(self): # s = "sorted(d.keys())" # self.unchanged(s) -# +# # def test_09(self): # b = "iter(d.keys())" # a = "iter(list(d.keys()))" # self.check(b, a) -# +# # def test_10(self): # b = "foo(d.keys())" # a = "foo(list(d.keys()))" # self.check(b, a) -# +# # def test_11(self): # b = "for i in d.keys(): print i" # a = "for i in list(d.keys()): print i" # self.check(b, a) -# +# # def test_12(self): # b = "for i in d.iterkeys(): print i" # a = "for i in d.keys(): print i" # self.check(b, a) -# +# # def test_13(self): # b = "[i for i in d.keys()]" # a = "[i for i in list(d.keys())]" # self.check(b, a) -# +# # def test_14(self): # b = "[i for i in d.iterkeys()]" # a = "[i for i in d.keys()]" # self.check(b, a) -# +# # def test_15(self): # b = "(i for i in d.keys())" # a = "(i for i in list(d.keys()))" # self.check(b, a) -# +# # def test_16(self): # b = "(i for i in d.iterkeys())" # a = "(i for i in d.keys())" # self.check(b, a) -# +# # def test_17(self): # b = "iter(d.iterkeys())" # a = "iter(d.keys())" # self.check(b, a) -# +# # def test_18(self): # b = "list(d.iterkeys())" # a = "list(d.keys())" # self.check(b, a) -# +# # def test_19(self): # b = "sorted(d.iterkeys())" # a = "sorted(d.keys())" # self.check(b, a) -# +# # def test_20(self): # b = "foo(d.iterkeys())" # a = "foo(iter(d.keys()))" # self.check(b, a) -# +# # def test_21(self): # b = "print h.iterkeys().next()" # a = "print iter(h.keys()).next()" # self.check(b, a) -# +# # def test_22(self): # b = "print h.keys()[0]" # a = "print list(h.keys())[0]" # self.check(b, a) -# +# # def test_23(self): # b = "print list(h.iterkeys().next())" # a = "print list(iter(h.keys()).next())" # self.check(b, a) -# +# # def test_24(self): # b = "for x in h.keys()[0]: print x" # a = "for x in list(h.keys())[0]: print x" # self.check(b, a) -# +# # def test_25(self): # b = "d.viewkeys()" # a = "d.keys()" # self.check(b, a) -# +# # def test_26(self): # b = "d.viewitems()" # a = "d.items()" # self.check(b, a) -# +# # def test_27(self): # b = "d.viewvalues()" # a = "d.values()" # self.check(b, a) -# +# # def test_14(self): # b = "[i for i in d.viewkeys()]" # a = "[i for i in d.keys()]" # self.check(b, a) -# +# # def test_15(self): # b = "(i for i in d.viewkeys())" # a = "(i for i in d.keys())" # self.check(b, a) -# +# # def test_17(self): # b = "iter(d.viewkeys())" # a = "iter(d.keys())" # self.check(b, a) -# +# # def test_18(self): # b = "list(d.viewkeys())" # a = "list(d.keys())" # self.check(b, a) -# +# # def test_19(self): # b = "sorted(d.viewkeys())" # a = "sorted(d.keys())" # self.check(b, a) -# +# # class Test_xrange(FixerTestCase): # fixer = "xrange" -# +# # def test_prefix_preservation(self): # b = """x = xrange( 10 )""" # a = """x = range( 10 )""" # self.check(b, a) -# +# # b = """x = xrange( 1 , 10 )""" # a = """x = range( 1 , 10 )""" # self.check(b, a) -# +# # b = """x = xrange( 0 , 10 , 2 )""" # a = """x = range( 0 , 10 , 2 )""" # self.check(b, a) -# +# # def test_single_arg(self): # b = """x = xrange(10)""" # a = """x = range(10)""" # self.check(b, a) -# +# # def test_two_args(self): # b = """x = xrange(1, 10)""" # a = """x = range(1, 10)""" # self.check(b, a) -# +# # def test_three_args(self): # b = """x = xrange(0, 10, 2)""" # a = """x = range(0, 10, 2)""" # self.check(b, a) -# +# # def test_wrap_in_list(self): # b = """x = range(10, 3, 9)""" # a = """x = list(range(10, 3, 9))""" # self.check(b, a) -# +# # b = """x = foo(range(10, 3, 9))""" # a = """x = foo(list(range(10, 3, 9)))""" # self.check(b, a) -# +# # b = """x = range(10, 3, 9) + [4]""" # a = """x = list(range(10, 3, 9)) + [4]""" # self.check(b, a) -# +# # b = """x = range(10)[::-1]""" # a = """x = list(range(10))[::-1]""" # self.check(b, a) -# +# # b = """x = range(10) [3]""" # a = """x = list(range(10)) [3]""" # self.check(b, a) -# +# # def test_xrange_in_for(self): # b = """for i in xrange(10):\n j=i""" # a = """for i in range(10):\n j=i""" # self.check(b, a) -# +# # b = """[i for i in xrange(10)]""" # a = """[i for i in range(10)]""" # self.check(b, a) -# +# # def test_range_in_for(self): # self.unchanged("for i in range(10): pass") # self.unchanged("[i for i in range(10)]") -# +# # def test_in_contains_test(self): # self.unchanged("x in range(10, 3, 9)") -# +# # def test_in_consuming_context(self): # for call in fixer_util.consuming_calls: # self.unchanged("a = %s(range(10))" % call) -# +# # class Test_xrange_with_reduce(FixerTestCase): -# +# # def setUp(self): # super(Test_xrange_with_reduce, self).setUp(["xrange", "reduce"]) -# +# # def test_double_transform(self): # b = """reduce(x, xrange(5))""" # a = """from functools import reduce # reduce(x, range(5))""" # self.check(b, a) -# +# # class Test_raw_input(FixerTestCase): # fixer = "raw_input" -# +# # def test_prefix_preservation(self): # b = """x = raw_input( )""" # a = """x = input( )""" # self.check(b, a) -# +# # b = """x = raw_input( '' )""" # a = """x = input( '' )""" # self.check(b, a) -# +# # def test_1(self): # b = """x = raw_input()""" # a = """x = input()""" # self.check(b, a) -# +# # def test_2(self): # b = """x = raw_input('')""" # a = """x = input('')""" # self.check(b, a) -# +# # def test_3(self): # b = """x = raw_input('prompt')""" # a = """x = input('prompt')""" # self.check(b, a) -# +# # def test_4(self): # b = """x = raw_input(foo(a) + 6)""" # a = """x = input(foo(a) + 6)""" # self.check(b, a) -# +# # def test_5(self): # b = """x = raw_input(invite).split()""" # a = """x = input(invite).split()""" # self.check(b, a) -# +# # def test_6(self): # b = """x = raw_input(invite) . split ()""" # a = """x = input(invite) . split ()""" # self.check(b, a) -# +# # def test_8(self): # b = "x = int(raw_input())" # a = "x = int(input())" # self.check(b, a) -# +# # class Test_funcattrs(FixerTestCase): # fixer = "funcattrs" -# +# # attrs = ["closure", "doc", "name", "defaults", "code", "globals", "dict"] -# +# # def test(self): # for attr in self.attrs: # b = "a.func_%s" % attr # a = "a.__%s__" % attr # self.check(b, a) -# +# # b = "self.foo.func_%s.foo_bar" % attr # a = "self.foo.__%s__.foo_bar" % attr # self.check(b, a) -# +# # def test_unchanged(self): # for attr in self.attrs: # s = "foo(func_%s + 5)" % attr # self.unchanged(s) -# +# # s = "f(foo.__%s__)" % attr # self.unchanged(s) -# +# # s = "f(foo.__%s__.foo)" % attr # self.unchanged(s) -# +# # class Test_xreadlines(FixerTestCase): # fixer = "xreadlines" -# +# # def test_call(self): # b = "for x in f.xreadlines(): pass" # a = "for x in f: pass" # self.check(b, a) -# +# # b = "for x in foo().xreadlines(): pass" # a = "for x in foo(): pass" # self.check(b, a) -# +# # b = "for x in (5 + foo()).xreadlines(): pass" # a = "for x in (5 + foo()): pass" # self.check(b, a) -# +# # def test_attr_ref(self): # b = "foo(f.xreadlines + 5)" # a = "foo(f.__iter__ + 5)" # self.check(b, a) -# +# # b = "foo(f().xreadlines + 5)" # a = "foo(f().__iter__ + 5)" # self.check(b, a) -# +# # b = "foo((5 + f()).xreadlines + 5)" # a = "foo((5 + f()).__iter__ + 5)" # self.check(b, a) -# +# # def test_unchanged(self): # s = "for x in f.xreadlines(5): pass" # self.unchanged(s) -# +# # s = "for x in f.xreadlines(k=5): pass" # self.unchanged(s) -# +# # s = "for x in f.xreadlines(*k, **v): pass" # self.unchanged(s) -# +# # s = "foo(xreadlines)" # self.unchanged(s) -# -# +# +# # class ImportsFixerTests: -# +# # def test_import_module(self): # for old, new in self.modules.items(): # b = "import %s" % old # a = "import %s" % new # self.check(b, a) -# +# # b = "import foo, %s, bar" % old # a = "import foo, %s, bar" % new # self.check(b, a) -# +# # def test_import_from(self): # for old, new in self.modules.items(): # b = "from %s import foo" % old # a = "from %s import foo" % new # self.check(b, a) -# +# # b = "from %s import foo, bar" % old # a = "from %s import foo, bar" % new # self.check(b, a) -# +# # b = "from %s import (yes, no)" % old # a = "from %s import (yes, no)" % new # self.check(b, a) -# +# # def test_import_module_as(self): # for old, new in self.modules.items(): # b = "import %s as foo_bar" % old # a = "import %s as foo_bar" % new # self.check(b, a) -# +# # b = "import %s as foo_bar" % old # a = "import %s as foo_bar" % new # self.check(b, a) -# +# # def test_import_from_as(self): # for old, new in self.modules.items(): # b = "from %s import foo as bar" % old # a = "from %s import foo as bar" % new # self.check(b, a) -# +# # def test_star(self): # for old, new in self.modules.items(): # b = "from %s import *" % old # a = "from %s import *" % new # self.check(b, a) -# +# # def test_import_module_usage(self): # for old, new in self.modules.items(): # b = """ @@ -1555,7 +1607,7 @@ def assert_runs_after(self, *names): # foo(%s.bar) # """ % (new, new) # self.check(b, a) -# +# # b = """ # from %s import x # %s = 23 @@ -1565,13 +1617,13 @@ def assert_runs_after(self, *names): # %s = 23 # """ % (new, old) # self.check(b, a) -# +# # s = """ # def f(): # %s.method() # """ % (old,) # self.unchanged(s) -# +# # # test nested usage # b = """ # import %s @@ -1582,7 +1634,7 @@ def assert_runs_after(self, *names): # %s.bar(%s.foo) # """ % (new, new, new) # self.check(b, a) -# +# # b = """ # import %s # x.%s @@ -1592,16 +1644,16 @@ def assert_runs_after(self, *names): # x.%s # """ % (new, old) # self.check(b, a) -# -# +# +# # class Test_imports(FixerTestCase, ImportsFixerTests): # fixer = "imports" -# +# # def test_multiple_imports(self): # b = """import urlparse, cStringIO""" # a = """import urllib.parse, io""" # self.check(b, a) -# +# # def test_multiple_imports_as(self): # b = """ # import copy_reg as bar, HTMLParser as foo, urlparse @@ -1612,14 +1664,14 @@ def assert_runs_after(self, *names): # s = urllib.parse.spam(bar.foo()) # """ # self.check(b, a) -# -# +# +# # class Test_imports2(FixerTestCase, ImportsFixerTests): # fixer = "imports2" -# -# +# +# # class Test_imports_fixer_order(FixerTestCase, ImportsFixerTests): -# +# # def setUp(self): # super(Test_imports_fixer_order, self).setUp(['imports', 'imports2']) # from ..fixes.fix_imports2 import MAPPING as mapping2 @@ -1627,23 +1679,23 @@ def assert_runs_after(self, *names): # from ..fixes.fix_imports import MAPPING as mapping1 # for key in ('dbhash', 'dumbdbm', 'dbm', 'gdbm'): # self.modules[key] = mapping1[key] -# +# # def test_after_local_imports_refactoring(self): # for fix in ("imports", "imports2"): # self.fixer = fix # self.assert_runs_after("import") -# -# +# +# # class Test_urllib(FixerTestCase): # fixer = "urllib" # from ..fixes.fix_urllib import MAPPING as modules -# +# # def test_import_module(self): # for old, changes in self.modules.items(): # b = "import %s" % old # a = "import %s" % ", ".join(map(itemgetter(0), changes)) # self.check(b, a) -# +# # def test_import_from(self): # for old, changes in self.modules.items(): # all_members = [] @@ -1653,28 +1705,28 @@ def assert_runs_after(self, *names): # b = "from %s import %s" % (old, member) # a = "from %s import %s" % (new, member) # self.check(b, a) -# +# # s = "from foo import %s" % member # self.unchanged(s) -# +# # b = "from %s import %s" % (old, ", ".join(members)) # a = "from %s import %s" % (new, ", ".join(members)) # self.check(b, a) -# +# # s = "from foo import %s" % ", ".join(members) # self.unchanged(s) -# +# # # test the breaking of a module into multiple replacements # b = "from %s import %s" % (old, ", ".join(all_members)) # a = "\n".join(["from %s import %s" % (new, ", ".join(members)) # for (new, members) in changes]) # self.check(b, a) -# +# # def test_import_module_as(self): # for old in self.modules: # s = "import %s as foo" % old # self.warns_unchanged(s, "This module is now multiple modules") -# +# # def test_import_from_as(self): # for old, changes in self.modules.items(): # for new, members in changes: @@ -1685,12 +1737,12 @@ def assert_runs_after(self, *names): # b = "from %s import %s as blah, %s" % (old, member, member) # a = "from %s import %s as blah, %s" % (new, member, member) # self.check(b, a) -# +# # def test_star(self): # for old in self.modules: # s = "from %s import *" % old # self.warns_unchanged(s, "Cannot handle star imports") -# +# # def test_indented(self): # b = """ # def foo(): @@ -1702,7 +1754,7 @@ def assert_runs_after(self, *names): # from urllib.request import urlopen # """ # self.check(b, a) -# +# # b = """ # def foo(): # other() @@ -1715,9 +1767,9 @@ def assert_runs_after(self, *names): # from urllib.request import urlopen # """ # self.check(b, a) -# -# -# +# +# +# # def test_import_module_usage(self): # for old, changes in self.modules.items(): # for new, members in changes: @@ -1742,163 +1794,163 @@ def assert_runs_after(self, *names): # %s.%s(%s.%s) # """ % (new_import, new, member, new, member) # self.check(b, a) -# -# +# +# # class Test_input(FixerTestCase): # fixer = "input" -# +# # def test_prefix_preservation(self): # b = """x = input( )""" # a = """x = eval(input( ))""" # self.check(b, a) -# +# # b = """x = input( '' )""" # a = """x = eval(input( '' ))""" # self.check(b, a) -# +# # def test_trailing_comment(self): # b = """x = input() # foo""" # a = """x = eval(input()) # foo""" # self.check(b, a) -# +# # def test_idempotency(self): # s = """x = eval(input())""" # self.unchanged(s) -# +# # s = """x = eval(input(''))""" # self.unchanged(s) -# +# # s = """x = eval(input(foo(5) + 9))""" # self.unchanged(s) -# +# # def test_1(self): # b = """x = input()""" # a = """x = eval(input())""" # self.check(b, a) -# +# # def test_2(self): # b = """x = input('')""" # a = """x = eval(input(''))""" # self.check(b, a) -# +# # def test_3(self): # b = """x = input('prompt')""" # a = """x = eval(input('prompt'))""" # self.check(b, a) -# +# # def test_4(self): # b = """x = input(foo(5) + 9)""" # a = """x = eval(input(foo(5) + 9))""" # self.check(b, a) -# +# # class Test_tuple_params(FixerTestCase): # fixer = "tuple_params" -# +# # def test_unchanged_1(self): # s = """def foo(): pass""" # self.unchanged(s) -# +# # def test_unchanged_2(self): # s = """def foo(a, b, c): pass""" # self.unchanged(s) -# +# # def test_unchanged_3(self): # s = """def foo(a=3, b=4, c=5): pass""" # self.unchanged(s) -# +# # def test_1(self): # b = """ # def foo(((a, b), c)): # x = 5""" -# +# # a = """ # def foo(xxx_todo_changeme): # ((a, b), c) = xxx_todo_changeme # x = 5""" # self.check(b, a) -# +# # def test_2(self): # b = """ # def foo(((a, b), c), d): # x = 5""" -# +# # a = """ # def foo(xxx_todo_changeme, d): # ((a, b), c) = xxx_todo_changeme # x = 5""" # self.check(b, a) -# +# # def test_3(self): # b = """ # def foo(((a, b), c), d) -> e: # x = 5""" -# +# # a = """ # def foo(xxx_todo_changeme, d) -> e: # ((a, b), c) = xxx_todo_changeme # x = 5""" # self.check(b, a) -# +# # def test_semicolon(self): # b = """ # def foo(((a, b), c)): x = 5; y = 7""" -# +# # a = """ # def foo(xxx_todo_changeme): ((a, b), c) = xxx_todo_changeme; x = 5; y = 7""" # self.check(b, a) -# +# # def test_keywords(self): # b = """ # def foo(((a, b), c), d, e=5) -> z: # x = 5""" -# +# # a = """ # def foo(xxx_todo_changeme, d, e=5) -> z: # ((a, b), c) = xxx_todo_changeme # x = 5""" # self.check(b, a) -# +# # def test_varargs(self): # b = """ # def foo(((a, b), c), d, *vargs, **kwargs) -> z: # x = 5""" -# +# # a = """ # def foo(xxx_todo_changeme, d, *vargs, **kwargs) -> z: # ((a, b), c) = xxx_todo_changeme # x = 5""" # self.check(b, a) -# +# # def test_multi_1(self): # b = """ # def foo(((a, b), c), (d, e, f)) -> z: # x = 5""" -# +# # a = """ # def foo(xxx_todo_changeme, xxx_todo_changeme1) -> z: # ((a, b), c) = xxx_todo_changeme # (d, e, f) = xxx_todo_changeme1 # x = 5""" # self.check(b, a) -# +# # def test_multi_2(self): # b = """ # def foo(x, ((a, b), c), d, (e, f, g), y) -> z: # x = 5""" -# +# # a = """ # def foo(x, xxx_todo_changeme, d, xxx_todo_changeme1, y) -> z: # ((a, b), c) = xxx_todo_changeme # (e, f, g) = xxx_todo_changeme1 # x = 5""" # self.check(b, a) -# +# # def test_docstring(self): # b = """ # def foo(((a, b), c), (d, e, f)) -> z: # "foo foo foo foo" # x = 5""" -# +# # a = """ # def foo(xxx_todo_changeme, xxx_todo_changeme1) -> z: # "foo foo foo foo" @@ -1906,83 +1958,83 @@ def assert_runs_after(self, *names): # (d, e, f) = xxx_todo_changeme1 # x = 5""" # self.check(b, a) -# +# # def test_lambda_no_change(self): # s = """lambda x: x + 5""" # self.unchanged(s) -# +# # def test_lambda_parens_single_arg(self): # b = """lambda (x): x + 5""" # a = """lambda x: x + 5""" # self.check(b, a) -# +# # b = """lambda(x): x + 5""" # a = """lambda x: x + 5""" # self.check(b, a) -# +# # b = """lambda ((((x)))): x + 5""" # a = """lambda x: x + 5""" # self.check(b, a) -# +# # b = """lambda((((x)))): x + 5""" # a = """lambda x: x + 5""" # self.check(b, a) -# +# # def test_lambda_simple(self): # b = """lambda (x, y): x + f(y)""" # a = """lambda x_y: x_y[0] + f(x_y[1])""" # self.check(b, a) -# +# # b = """lambda(x, y): x + f(y)""" # a = """lambda x_y: x_y[0] + f(x_y[1])""" # self.check(b, a) -# +# # b = """lambda (((x, y))): x + f(y)""" # a = """lambda x_y: x_y[0] + f(x_y[1])""" # self.check(b, a) -# +# # b = """lambda(((x, y))): x + f(y)""" # a = """lambda x_y: x_y[0] + f(x_y[1])""" # self.check(b, a) -# +# # def test_lambda_one_tuple(self): # b = """lambda (x,): x + f(x)""" # a = """lambda x1: x1[0] + f(x1[0])""" # self.check(b, a) -# +# # b = """lambda (((x,))): x + f(x)""" # a = """lambda x1: x1[0] + f(x1[0])""" # self.check(b, a) -# +# # def test_lambda_simple_multi_use(self): # b = """lambda (x, y): x + x + f(x) + x""" # a = """lambda x_y: x_y[0] + x_y[0] + f(x_y[0]) + x_y[0]""" # self.check(b, a) -# +# # def test_lambda_simple_reverse(self): # b = """lambda (x, y): y + x""" # a = """lambda x_y: x_y[1] + x_y[0]""" # self.check(b, a) -# +# # def test_lambda_nested(self): # b = """lambda (x, (y, z)): x + y + z""" # a = """lambda x_y_z: x_y_z[0] + x_y_z[1][0] + x_y_z[1][1]""" # self.check(b, a) -# +# # b = """lambda (((x, (y, z)))): x + y + z""" # a = """lambda x_y_z: x_y_z[0] + x_y_z[1][0] + x_y_z[1][1]""" # self.check(b, a) -# +# # def test_lambda_nested_multi_use(self): # b = """lambda (x, (y, z)): x + y + f(y)""" # a = """lambda x_y_z: x_y_z[0] + x_y_z[1][0] + f(x_y_z[1][0])""" # self.check(b, a) -# +# # class Test_methodattrs(FixerTestCase): # fixer = "methodattrs" -# +# # attrs = ["func", "self", "class"] -# +# # def test(self): # for attr in self.attrs: # b = "a.im_%s" % attr @@ -1991,58 +2043,58 @@ def assert_runs_after(self, *names): # else: # a = "a.__%s__" % attr # self.check(b, a) -# +# # b = "self.foo.im_%s.foo_bar" % attr # if attr == "class": # a = "self.foo.__self__.__class__.foo_bar" # else: # a = "self.foo.__%s__.foo_bar" % attr # self.check(b, a) -# +# # def test_unchanged(self): # for attr in self.attrs: # s = "foo(im_%s + 5)" % attr # self.unchanged(s) -# +# # s = "f(foo.__%s__)" % attr # self.unchanged(s) -# +# # s = "f(foo.__%s__.foo)" % attr # self.unchanged(s) -# +# # class Test_next(FixerTestCase): # fixer = "next" -# +# # def test_1(self): # b = """it.next()""" # a = """next(it)""" # self.check(b, a) -# +# # def test_2(self): # b = """a.b.c.d.next()""" # a = """next(a.b.c.d)""" # self.check(b, a) -# +# # def test_3(self): # b = """(a + b).next()""" # a = """next((a + b))""" # self.check(b, a) -# +# # def test_4(self): # b = """a().next()""" # a = """next(a())""" # self.check(b, a) -# +# # def test_5(self): # b = """a().next() + b""" # a = """next(a()) + b""" # self.check(b, a) -# +# # def test_6(self): # b = """c( a().next() + b)""" # a = """c( next(a()) + b)""" # self.check(b, a) -# +# # def test_prefix_preservation_1(self): # b = """ # for a in b: @@ -2055,7 +2107,7 @@ def assert_runs_after(self, *names): # next(a) # """ # self.check(b, a) -# +# # def test_prefix_preservation_2(self): # b = """ # for a in b: @@ -2070,7 +2122,7 @@ def assert_runs_after(self, *names): # next(a) # """ # self.check(b, a) -# +# # def test_prefix_preservation_3(self): # b = """ # next = 5 @@ -2085,7 +2137,7 @@ def assert_runs_after(self, *names): # a.__next__() # """ # self.check(b, a, ignore_warnings=True) -# +# # def test_prefix_preservation_4(self): # b = """ # next = 5 @@ -2102,7 +2154,7 @@ def assert_runs_after(self, *names): # a.__next__() # """ # self.check(b, a, ignore_warnings=True) -# +# # def test_prefix_preservation_5(self): # b = """ # next = 5 @@ -2117,7 +2169,7 @@ def assert_runs_after(self, *names): # a.__next__()) # """ # self.check(b, a, ignore_warnings=True) -# +# # def test_prefix_preservation_6(self): # b = """ # for a in b: @@ -2130,7 +2182,7 @@ def assert_runs_after(self, *names): # next(a)) # """ # self.check(b, a) -# +# # def test_method_1(self): # b = """ # class A: @@ -2143,7 +2195,7 @@ def assert_runs_after(self, *names): # pass # """ # self.check(b, a) -# +# # def test_method_2(self): # b = """ # class A(object): @@ -2156,7 +2208,7 @@ def assert_runs_after(self, *names): # pass # """ # self.check(b, a) -# +# # def test_method_3(self): # b = """ # class A: @@ -2169,16 +2221,16 @@ def assert_runs_after(self, *names): # pass # """ # self.check(b, a) -# +# # def test_method_4(self): # b = """ # class A: # def __init__(self, foo): # self.foo = foo -# +# # def next(self): # pass -# +# # def __iter__(self): # return self # """ @@ -2186,15 +2238,15 @@ def assert_runs_after(self, *names): # class A: # def __init__(self, foo): # self.foo = foo -# +# # def __next__(self): # pass -# +# # def __iter__(self): # return self # """ # self.check(b, a) -# +# # def test_method_unchanged(self): # s = """ # class A: @@ -2202,227 +2254,227 @@ def assert_runs_after(self, *names): # pass # """ # self.unchanged(s) -# +# # def test_shadowing_assign_simple(self): # s = """ # next = foo -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_assign_tuple_1(self): # s = """ # (next, a) = foo -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_assign_tuple_2(self): # s = """ # (a, (b, (next, c)), a) = foo -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_assign_list_1(self): # s = """ # [next, a] = foo -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_assign_list_2(self): # s = """ # [a, [b, [next, c]], a] = foo -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_builtin_assign(self): # s = """ # def foo(): # __builtin__.next = foo -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_builtin_assign_in_tuple(self): # s = """ # def foo(): # (a, __builtin__.next) = foo -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_builtin_assign_in_list(self): # s = """ # def foo(): # [a, __builtin__.next] = foo -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_assign_to_next(self): # s = """ # def foo(): # A.next = foo -# +# # class A: # def next(self, a, b): # pass # """ # self.unchanged(s) -# +# # def test_assign_to_next_in_tuple(self): # s = """ # def foo(): # (a, A.next) = foo -# +# # class A: # def next(self, a, b): # pass # """ # self.unchanged(s) -# +# # def test_assign_to_next_in_list(self): # s = """ # def foo(): # [a, A.next] = foo -# +# # class A: # def next(self, a, b): # pass # """ # self.unchanged(s) -# +# # def test_shadowing_import_1(self): # s = """ # import foo.bar as next -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_import_2(self): # s = """ # import bar, bar.foo as next -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_import_3(self): # s = """ # import bar, bar.foo as next, baz -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_import_from_1(self): # s = """ # from x import next -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_import_from_2(self): # s = """ # from x.a import next -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_import_from_3(self): # s = """ # from x import a, next, b -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_import_from_4(self): # s = """ # from x.a import a, next, b -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_funcdef_1(self): # s = """ # def next(a): # pass -# +# # class A: # def next(self, a, b): # pass # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_funcdef_2(self): # b = """ # def next(a): # pass -# +# # class A: # def next(self): # pass -# +# # it.next() # """ # a = """ # def next(a): # pass -# +# # class A: # def __next__(self): # pass -# +# # it.__next__() # """ # self.warns(b, a, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_global_1(self): # s = """ # def f(): @@ -2430,7 +2482,7 @@ def assert_runs_after(self, *names): # next = 5 # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_global_2(self): # s = """ # def f(): @@ -2438,55 +2490,55 @@ def assert_runs_after(self, *names): # next = 5 # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_for_simple(self): # s = """ # for next in it(): # pass -# +# # b = 5 # c = 6 # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_for_tuple_1(self): # s = """ # for next, b in it(): # pass -# +# # b = 5 # c = 6 # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_shadowing_for_tuple_2(self): # s = """ # for a, (next, c), b in it(): # pass -# +# # b = 5 # c = 6 # """ # self.warns_unchanged(s, "Calls to builtin next() possibly shadowed") -# +# # def test_noncall_access_1(self): # b = """gnext = g.next""" # a = """gnext = g.__next__""" # self.check(b, a) -# +# # def test_noncall_access_2(self): # b = """f(g.next + 5)""" # a = """f(g.__next__ + 5)""" # self.check(b, a) -# +# # def test_noncall_access_3(self): # b = """f(g().next + 5)""" # a = """f(g().__next__ + 5)""" # self.check(b, a) -# +# # class Test_nonzero(FixerTestCase): # fixer = "nonzero" -# +# # def test_1(self): # b = """ # class A: @@ -2499,7 +2551,7 @@ def assert_runs_after(self, *names): # pass # """ # self.check(b, a) -# +# # def test_2(self): # b = """ # class A(object): @@ -2512,7 +2564,7 @@ def assert_runs_after(self, *names): # pass # """ # self.check(b, a) -# +# # def test_unchanged_1(self): # s = """ # class A(object): @@ -2520,7 +2572,7 @@ def assert_runs_after(self, *names): # pass # """ # self.unchanged(s) -# +# # def test_unchanged_2(self): # s = """ # class A(object): @@ -2528,101 +2580,101 @@ def assert_runs_after(self, *names): # pass # """ # self.unchanged(s) -# +# # def test_unchanged_func(self): # s = """ # def __nonzero__(self): # pass # """ # self.unchanged(s) -# +# # class Test_numliterals(FixerTestCase): # fixer = "numliterals" -# +# # def test_octal_1(self): # b = """0755""" # a = """0o755""" # self.check(b, a) -# +# # def test_long_int_1(self): # b = """a = 12L""" # a = """a = 12""" # self.check(b, a) -# +# # def test_long_int_2(self): # b = """a = 12l""" # a = """a = 12""" # self.check(b, a) -# +# # def test_long_hex(self): # b = """b = 0x12l""" # a = """b = 0x12""" # self.check(b, a) -# +# # def test_comments_and_spacing(self): # b = """b = 0x12L""" # a = """b = 0x12""" # self.check(b, a) -# +# # b = """b = 0755 # spam""" # a = """b = 0o755 # spam""" # self.check(b, a) -# +# # def test_unchanged_int(self): # s = """5""" # self.unchanged(s) -# +# # def test_unchanged_float(self): # s = """5.0""" # self.unchanged(s) -# +# # def test_unchanged_octal(self): # s = """0o755""" # self.unchanged(s) -# +# # def test_unchanged_hex(self): # s = """0xABC""" # self.unchanged(s) -# +# # def test_unchanged_exp(self): # s = """5.0e10""" # self.unchanged(s) -# +# # def test_unchanged_complex_int(self): # s = """5 + 4j""" # self.unchanged(s) -# +# # def test_unchanged_complex_float(self): # s = """5.4 + 4.9j""" # self.unchanged(s) -# +# # def test_unchanged_complex_bare(self): # s = """4j""" # self.unchanged(s) # s = """4.4j""" # self.unchanged(s) -# +# # class Test_renames(FixerTestCase): # fixer = "renames" -# +# # modules = {"sys": ("maxint", "maxsize"), # } -# +# # def test_import_from(self): # for mod, (old, new) in self.modules.items(): # b = "from %s import %s" % (mod, old) # a = "from %s import %s" % (mod, new) # self.check(b, a) -# +# # s = "from foo import %s" % old # self.unchanged(s) -# +# # def test_import_from_as(self): # for mod, (old, new) in self.modules.items(): # b = "from %s import %s as foo_bar" % (mod, old) # a = "from %s import %s as foo_bar" % (mod, new) # self.check(b, a) -# +# # def test_import_module_usage(self): # for mod, (old, new) in self.modules.items(): # b = """ @@ -2634,7 +2686,7 @@ def assert_runs_after(self, *names): # foo(%s, %s.%s) # """ % (mod, mod, mod, new) # self.check(b, a) -# +# # def XXX_test_from_import_usage(self): # # not implemented yet # for mod, (old, new) in self.modules.items(): @@ -2647,66 +2699,66 @@ def assert_runs_after(self, *names): # foo(%s, %s) # """ % (mod, new, mod, new) # self.check(b, a) -# +# # class Test_unicode(FixerTestCase): # fixer = "unicode" -# +# # def test_whitespace(self): # b = """unicode( x)""" # a = """str( x)""" # self.check(b, a) -# +# # b = """ unicode(x )""" # a = """ str(x )""" # self.check(b, a) -# +# # b = """ u'h'""" # a = """ 'h'""" # self.check(b, a) -# +# # def test_unicode_call(self): # b = """unicode(x, y, z)""" # a = """str(x, y, z)""" # self.check(b, a) -# +# # def test_unichr(self): # b = """unichr(u'h')""" # a = """chr('h')""" # self.check(b, a) -# +# # def test_unicode_literal_1(self): # b = '''u"x"''' # a = '''"x"''' # self.check(b, a) -# +# # def test_unicode_literal_2(self): # b = """ur'x'""" # a = """r'x'""" # self.check(b, a) -# +# # def test_unicode_literal_3(self): # b = """UR'''x''' """ # a = """R'''x''' """ # self.check(b, a) -# +# # class Test_callable(FixerTestCase): # fixer = "callable" -# +# # def test_prefix_preservation(self): # b = """callable( x)""" # a = """import collections\nisinstance( x, collections.Callable)""" # self.check(b, a) -# +# # b = """if callable(x): pass""" # a = """import collections # if isinstance(x, collections.Callable): pass""" # self.check(b, a) -# +# # def test_callable_call(self): # b = """callable(x)""" # a = """import collections\nisinstance(x, collections.Callable)""" # self.check(b, a) -# +# # def test_global_import(self): # b = """ # def spam(foo): @@ -2716,14 +2768,14 @@ def assert_runs_after(self, *names): # def spam(foo): # isinstance(foo, collections.Callable)"""[1:] # self.check(b, a) -# +# # b = """ # import collections # def spam(foo): # callable(foo)"""[1:] # # same output if it was already imported # self.check(b, a) -# +# # b = """ # from collections import * # def spam(foo): @@ -2734,7 +2786,7 @@ def assert_runs_after(self, *names): # def spam(foo): # isinstance(foo, collections.Callable)"""[1:] # self.check(b, a) -# +# # b = """ # do_stuff() # do_some_other_stuff() @@ -2745,7 +2797,7 @@ def assert_runs_after(self, *names): # do_some_other_stuff() # assert isinstance(do_stuff, collections.Callable)"""[1:] # self.check(b, a) -# +# # b = """ # if isinstance(do_stuff, Callable): # assert callable(do_stuff) @@ -2768,55 +2820,55 @@ def assert_runs_after(self, *names): # else: # assert not isinstance(do_stuff, collections.Callable)"""[1:] # self.check(b, a) -# +# # def test_callable_should_not_change(self): # a = """callable(*x)""" # self.unchanged(a) -# +# # a = """callable(x, y)""" # self.unchanged(a) -# +# # a = """callable(x, kw=y)""" # self.unchanged(a) -# +# # a = """callable()""" # self.unchanged(a) -# +# # class Test_filter(FixerTestCase): # fixer = "filter" -# +# # def test_prefix_preservation(self): # b = """x = filter( foo, 'abc' )""" # a = """x = list(filter( foo, 'abc' ))""" # self.check(b, a) -# +# # b = """x = filter( None , 'abc' )""" # a = """x = [_f for _f in 'abc' if _f]""" # self.check(b, a) -# +# # def test_filter_basic(self): # b = """x = filter(None, 'abc')""" # a = """x = [_f for _f in 'abc' if _f]""" # self.check(b, a) -# +# # b = """x = len(filter(f, 'abc'))""" # a = """x = len(list(filter(f, 'abc')))""" # self.check(b, a) -# +# # b = """x = filter(lambda x: x%2 == 0, range(10))""" # a = """x = [x for x in range(10) if x%2 == 0]""" # self.check(b, a) -# +# # # Note the parens around x # b = """x = filter(lambda (x): x%2 == 0, range(10))""" # a = """x = [x for x in range(10) if x%2 == 0]""" # self.check(b, a) -# +# # # XXX This (rare) case is not supported # ## b = """x = filter(f, 'abc')[0]""" # ## a = """x = list(filter(f, 'abc'))[0]""" # ## self.check(b, a) -# +# # def test_filter_nochange(self): # a = """b.join(filter(f, 'abc'))""" # self.unchanged(a) @@ -2856,62 +2908,62 @@ def assert_runs_after(self, *names): # self.unchanged(a) # a = """(x for x in filter(f, 'abc'))""" # self.unchanged(a) -# +# # def test_future_builtins(self): # a = "from future_builtins import spam, filter; filter(f, 'ham')" # self.unchanged(a) -# +# # b = """from future_builtins import spam; x = filter(f, 'abc')""" # a = """from future_builtins import spam; x = list(filter(f, 'abc'))""" # self.check(b, a) -# +# # a = "from future_builtins import *; filter(f, 'ham')" # self.unchanged(a) -# +# # class Test_map(FixerTestCase): # fixer = "map" -# +# # def check(self, b, a): # self.unchanged("from future_builtins import map; " + b, a) # super(Test_map, self).check(b, a) -# +# # def test_prefix_preservation(self): # b = """x = map( f, 'abc' )""" # a = """x = list(map( f, 'abc' ))""" # self.check(b, a) -# +# # def test_trailing_comment(self): # b = """x = map(f, 'abc') # foo""" # a = """x = list(map(f, 'abc')) # foo""" # self.check(b, a) -# +# # def test_None_with_multiple_arguments(self): # s = """x = map(None, a, b, c)""" # self.warns_unchanged(s, "cannot convert map(None, ...) with " # "multiple arguments") -# +# # def test_map_basic(self): # b = """x = map(f, 'abc')""" # a = """x = list(map(f, 'abc'))""" # self.check(b, a) -# +# # b = """x = len(map(f, 'abc', 'def'))""" # a = """x = len(list(map(f, 'abc', 'def')))""" # self.check(b, a) -# +# # b = """x = map(None, 'abc')""" # a = """x = list('abc')""" # self.check(b, a) -# +# # b = """x = map(lambda x: x+1, range(4))""" # a = """x = [x+1 for x in range(4)]""" # self.check(b, a) -# +# # # Note the parens around x # b = """x = map(lambda (x): x+1, range(4))""" # a = """x = [x+1 for x in range(4)]""" # self.check(b, a) -# +# # b = """ # foo() # # foo @@ -2923,12 +2975,12 @@ def assert_runs_after(self, *names): # list(map(f, x)) # """ # self.warns(b, a, "You should use a for loop here") -# +# # # XXX This (rare) case is not supported # ## b = """x = map(f, 'abc')[0]""" # ## a = """x = list(map(f, 'abc'))[0]""" # ## self.check(b, a) -# +# # def test_map_nochange(self): # a = """b.join(map(f, 'abc'))""" # self.unchanged(a) @@ -2968,34 +3020,34 @@ def assert_runs_after(self, *names): # self.unchanged(a) # a = """(x for x in map(f, 'abc'))""" # self.unchanged(a) -# +# # def test_future_builtins(self): # a = "from future_builtins import spam, map, eggs; map(f, 'ham')" # self.unchanged(a) -# +# # b = """from future_builtins import spam, eggs; x = map(f, 'abc')""" # a = """from future_builtins import spam, eggs; x = list(map(f, 'abc'))""" # self.check(b, a) -# +# # a = "from future_builtins import *; map(f, 'ham')" # self.unchanged(a) -# +# # class Test_zip(FixerTestCase): # fixer = "zip" -# +# # def check(self, b, a): # self.unchanged("from future_builtins import zip; " + b, a) # super(Test_zip, self).check(b, a) -# +# # def test_zip_basic(self): # b = """x = zip(a, b, c)""" # a = """x = list(zip(a, b, c))""" # self.check(b, a) -# +# # b = """x = len(zip(a, b))""" # a = """x = len(list(zip(a, b)))""" # self.check(b, a) -# +# # def test_zip_nochange(self): # a = """b.join(zip(a, b))""" # self.unchanged(a) @@ -3035,74 +3087,74 @@ def assert_runs_after(self, *names): # self.unchanged(a) # a = """(x for x in zip(a, b))""" # self.unchanged(a) -# +# # def test_future_builtins(self): # a = "from future_builtins import spam, zip, eggs; zip(a, b)" # self.unchanged(a) -# +# # b = """from future_builtins import spam, eggs; x = zip(a, b)""" # a = """from future_builtins import spam, eggs; x = list(zip(a, b))""" # self.check(b, a) -# +# # a = "from future_builtins import *; zip(a, b)" # self.unchanged(a) -# +# # class Test_standarderror(FixerTestCase): # fixer = "standarderror" -# +# # def test(self): # b = """x = StandardError()""" # a = """x = Exception()""" # self.check(b, a) -# +# # b = """x = StandardError(a, b, c)""" # a = """x = Exception(a, b, c)""" # self.check(b, a) -# +# # b = """f(2 + StandardError(a, b, c))""" # a = """f(2 + Exception(a, b, c))""" # self.check(b, a) -# +# # class Test_types(FixerTestCase): # fixer = "types" -# +# # def test_basic_types_convert(self): # b = """types.StringType""" # a = """bytes""" # self.check(b, a) -# +# # b = """types.DictType""" # a = """dict""" # self.check(b, a) -# +# # b = """types . IntType""" # a = """int""" # self.check(b, a) -# +# # b = """types.ListType""" # a = """list""" # self.check(b, a) -# +# # b = """types.LongType""" # a = """int""" # self.check(b, a) -# +# # b = """types.NoneType""" # a = """type(None)""" # self.check(b, a) -# +# # class Test_idioms(FixerTestCase): # fixer = "idioms" -# +# # def test_while(self): # b = """while 1: foo()""" # a = """while True: foo()""" # self.check(b, a) -# +# # b = """while 1: foo()""" # a = """while True: foo()""" # self.check(b, a) -# +# # b = """ # while 1: # foo() @@ -3112,132 +3164,132 @@ def assert_runs_after(self, *names): # foo() # """ # self.check(b, a) -# +# # def test_while_unchanged(self): # s = """while 11: foo()""" # self.unchanged(s) -# +# # s = """while 0: foo()""" # self.unchanged(s) -# +# # s = """while foo(): foo()""" # self.unchanged(s) -# +# # s = """while []: foo()""" # self.unchanged(s) -# +# # def test_eq_simple(self): # b = """type(x) == T""" # a = """isinstance(x, T)""" # self.check(b, a) -# +# # b = """if type(x) == T: pass""" # a = """if isinstance(x, T): pass""" # self.check(b, a) -# +# # def test_eq_reverse(self): # b = """T == type(x)""" # a = """isinstance(x, T)""" # self.check(b, a) -# +# # b = """if T == type(x): pass""" # a = """if isinstance(x, T): pass""" # self.check(b, a) -# +# # def test_eq_expression(self): # b = """type(x+y) == d.get('T')""" # a = """isinstance(x+y, d.get('T'))""" # self.check(b, a) -# +# # b = """type( x + y) == d.get('T')""" # a = """isinstance(x + y, d.get('T'))""" # self.check(b, a) -# +# # def test_is_simple(self): # b = """type(x) is T""" # a = """isinstance(x, T)""" # self.check(b, a) -# +# # b = """if type(x) is T: pass""" # a = """if isinstance(x, T): pass""" # self.check(b, a) -# +# # def test_is_reverse(self): # b = """T is type(x)""" # a = """isinstance(x, T)""" # self.check(b, a) -# +# # b = """if T is type(x): pass""" # a = """if isinstance(x, T): pass""" # self.check(b, a) -# +# # def test_is_expression(self): # b = """type(x+y) is d.get('T')""" # a = """isinstance(x+y, d.get('T'))""" # self.check(b, a) -# +# # b = """type( x + y) is d.get('T')""" # a = """isinstance(x + y, d.get('T'))""" # self.check(b, a) -# +# # def test_is_not_simple(self): # b = """type(x) is not T""" # a = """not isinstance(x, T)""" # self.check(b, a) -# +# # b = """if type(x) is not T: pass""" # a = """if not isinstance(x, T): pass""" # self.check(b, a) -# +# # def test_is_not_reverse(self): # b = """T is not type(x)""" # a = """not isinstance(x, T)""" # self.check(b, a) -# +# # b = """if T is not type(x): pass""" # a = """if not isinstance(x, T): pass""" # self.check(b, a) -# +# # def test_is_not_expression(self): # b = """type(x+y) is not d.get('T')""" # a = """not isinstance(x+y, d.get('T'))""" # self.check(b, a) -# +# # b = """type( x + y) is not d.get('T')""" # a = """not isinstance(x + y, d.get('T'))""" # self.check(b, a) -# +# # def test_ne_simple(self): # b = """type(x) != T""" # a = """not isinstance(x, T)""" # self.check(b, a) -# +# # b = """if type(x) != T: pass""" # a = """if not isinstance(x, T): pass""" # self.check(b, a) -# +# # def test_ne_reverse(self): # b = """T != type(x)""" # a = """not isinstance(x, T)""" # self.check(b, a) -# +# # b = """if T != type(x): pass""" # a = """if not isinstance(x, T): pass""" # self.check(b, a) -# +# # def test_ne_expression(self): # b = """type(x+y) != d.get('T')""" # a = """not isinstance(x+y, d.get('T'))""" # self.check(b, a) -# +# # b = """type( x + y) != d.get('T')""" # a = """not isinstance(x + y, d.get('T'))""" # self.check(b, a) -# +# # def test_type_unchanged(self): # a = """type(x).__name__""" # self.unchanged(a) -# +# # def test_sort_list_call(self): # b = """ # v = list(t) @@ -3249,7 +3301,7 @@ def assert_runs_after(self, *names): # foo(v) # """ # self.check(b, a) -# +# # b = """ # v = list(foo(b) + d) # v.sort() @@ -3260,7 +3312,7 @@ def assert_runs_after(self, *names): # foo(v) # """ # self.check(b, a) -# +# # b = """ # while x: # v = list(t) @@ -3273,7 +3325,7 @@ def assert_runs_after(self, *names): # foo(v) # """ # self.check(b, a) -# +# # b = """ # v = list(t) # # foo @@ -3286,7 +3338,7 @@ def assert_runs_after(self, *names): # foo(v) # """ # self.check(b, a) -# +# # b = r""" # v = list( t) # v.sort() @@ -3297,21 +3349,21 @@ def assert_runs_after(self, *names): # foo(v) # """ # self.check(b, a) -# +# # b = r""" # try: # m = list(s) # m.sort() # except: pass # """ -# +# # a = r""" # try: # m = sorted(s) # except: pass # """ # self.check(b, a) -# +# # b = r""" # try: # m = list(s) @@ -3319,7 +3371,7 @@ def assert_runs_after(self, *names): # m.sort() # except: pass # """ -# +# # a = r""" # try: # m = sorted(s) @@ -3327,17 +3379,17 @@ def assert_runs_after(self, *names): # except: pass # """ # self.check(b, a) -# +# # b = r""" # m = list(s) # # more comments # m.sort()""" -# +# # a = r""" # m = sorted(s) # # more comments""" # self.check(b, a) -# +# # def test_sort_simple_expr(self): # b = """ # v = t @@ -3349,7 +3401,7 @@ def assert_runs_after(self, *names): # foo(v) # """ # self.check(b, a) -# +# # b = """ # v = foo(b) # v.sort() @@ -3360,7 +3412,7 @@ def assert_runs_after(self, *names): # foo(v) # """ # self.check(b, a) -# +# # b = """ # v = b.keys() # v.sort() @@ -3371,7 +3423,7 @@ def assert_runs_after(self, *names): # foo(v) # """ # self.check(b, a) -# +# # b = """ # v = foo(b) + d # v.sort() @@ -3382,7 +3434,7 @@ def assert_runs_after(self, *names): # foo(v) # """ # self.check(b, a) -# +# # b = """ # while x: # v = t @@ -3395,7 +3447,7 @@ def assert_runs_after(self, *names): # foo(v) # """ # self.check(b, a) -# +# # b = """ # v = t # # foo @@ -3408,7 +3460,7 @@ def assert_runs_after(self, *names): # foo(v) # """ # self.check(b, a) -# +# # b = r""" # v = t # v.sort() @@ -3419,7 +3471,7 @@ def assert_runs_after(self, *names): # foo(v) # """ # self.check(b, a) -# +# # def test_sort_unchanged(self): # s = """ # v = list(t) @@ -3427,57 +3479,57 @@ def assert_runs_after(self, *names): # foo(w) # """ # self.unchanged(s) -# +# # s = """ # v = list(t) # v.sort(u) # foo(v) # """ # self.unchanged(s) -# +# # class Test_basestring(FixerTestCase): # fixer = "basestring" -# +# # def test_basestring(self): # b = """isinstance(x, basestring)""" # a = """isinstance(x, str)""" # self.check(b, a) -# +# # class Test_buffer(FixerTestCase): # fixer = "buffer" -# +# # def test_buffer(self): # b = """x = buffer(y)""" # a = """x = memoryview(y)""" # self.check(b, a) -# +# # def test_slicing(self): # b = """buffer(y)[4:5]""" # a = """memoryview(y)[4:5]""" # self.check(b, a) -# +# # class Test_future(FixerTestCase): # fixer = "future" -# +# # def test_future(self): # b = """from __future__ import braces""" # a = """""" # self.check(b, a) -# +# # b = """# comment\nfrom __future__ import braces""" # a = """# comment\n""" # self.check(b, a) -# +# # b = """from __future__ import braces\n# comment""" # a = """\n# comment""" # self.check(b, a) -# +# # def test_run_order(self): # self.assert_runs_after('print') -# +# # class Test_itertools(FixerTestCase): # fixer = "itertools" -# +# # def checkall(self, before, after): # # Because we need to check with and without the itertools prefix # # and on each of the three functions, these loops make it all @@ -3487,132 +3539,132 @@ def assert_runs_after(self, *names): # b = before %(i+'i'+f) # a = after %(f) # self.check(b, a) -# +# # def test_0(self): # # A simple example -- test_1 covers exactly the same thing, # # but it's not quite as clear. # b = "itertools.izip(a, b)" # a = "zip(a, b)" # self.check(b, a) -# +# # def test_1(self): # b = """%s(f, a)""" # a = """%s(f, a)""" # self.checkall(b, a) -# +# # def test_qualified(self): # b = """itertools.ifilterfalse(a, b)""" # a = """itertools.filterfalse(a, b)""" # self.check(b, a) -# +# # b = """itertools.izip_longest(a, b)""" # a = """itertools.zip_longest(a, b)""" # self.check(b, a) -# +# # def test_2(self): # b = """ifilterfalse(a, b)""" # a = """filterfalse(a, b)""" # self.check(b, a) -# +# # b = """izip_longest(a, b)""" # a = """zip_longest(a, b)""" # self.check(b, a) -# +# # def test_space_1(self): # b = """ %s(f, a)""" # a = """ %s(f, a)""" # self.checkall(b, a) -# +# # def test_space_2(self): # b = """ itertools.ifilterfalse(a, b)""" # a = """ itertools.filterfalse(a, b)""" # self.check(b, a) -# +# # b = """ itertools.izip_longest(a, b)""" # a = """ itertools.zip_longest(a, b)""" # self.check(b, a) -# +# # def test_run_order(self): # self.assert_runs_after('map', 'zip', 'filter') -# -# +# +# # class Test_itertools_imports(FixerTestCase): # fixer = 'itertools_imports' -# +# # def test_reduced(self): # b = "from itertools import imap, izip, foo" # a = "from itertools import foo" # self.check(b, a) -# +# # b = "from itertools import bar, imap, izip, foo" # a = "from itertools import bar, foo" # self.check(b, a) -# +# # b = "from itertools import chain, imap, izip" # a = "from itertools import chain" # self.check(b, a) -# +# # def test_comments(self): # b = "#foo\nfrom itertools import imap, izip" # a = "#foo\n" # self.check(b, a) -# +# # def test_none(self): # b = "from itertools import imap, izip" # a = "" # self.check(b, a) -# +# # b = "from itertools import izip" # a = "" # self.check(b, a) -# +# # def test_import_as(self): # b = "from itertools import izip, bar as bang, imap" # a = "from itertools import bar as bang" # self.check(b, a) -# +# # b = "from itertools import izip as _zip, imap, bar" # a = "from itertools import bar" # self.check(b, a) -# +# # b = "from itertools import imap as _map" # a = "" # self.check(b, a) -# +# # b = "from itertools import imap as _map, izip as _zip" # a = "" # self.check(b, a) -# +# # s = "from itertools import bar as bang" # self.unchanged(s) -# +# # def test_ifilter_and_zip_longest(self): # for name in "filterfalse", "zip_longest": # b = "from itertools import i%s" % (name,) # a = "from itertools import %s" % (name,) # self.check(b, a) -# +# # b = "from itertools import imap, i%s, foo" % (name,) # a = "from itertools import %s, foo" % (name,) # self.check(b, a) -# +# # b = "from itertools import bar, i%s, foo" % (name,) # a = "from itertools import bar, %s, foo" % (name,) # self.check(b, a) -# +# # def test_import_star(self): # s = "from itertools import *" # self.unchanged(s) -# -# +# +# # def test_unchanged(self): # s = "from itertools import foo" # self.unchanged(s) -# -# +# +# # class Test_import(FixerTestCase): # fixer = "import" -# +# # def setUp(self): # super(Test_import, self).setUp() # # Need to replace fix_import's exists method @@ -3623,145 +3675,145 @@ def assert_runs_after(self, *names): # def fake_exists(name): # self.files_checked.append(name) # return self.always_exists or (name in self.present_files) -# +# # from lib2to3.fixes import fix_import # fix_import.exists = fake_exists -# +# # def tearDown(self): # from lib2to3.fixes import fix_import # fix_import.exists = os.path.exists -# +# # def check_both(self, b, a): # self.always_exists = True # super(Test_import, self).check(b, a) # self.always_exists = False # super(Test_import, self).unchanged(b) -# +# # def test_files_checked(self): # def p(path): # # Takes a unix path and returns a path with correct separators # return os.path.pathsep.join(path.split("/")) -# +# # self.always_exists = False # self.present_files = set(['__init__.py']) # expected_extensions = ('.py', os.path.sep, '.pyc', '.so', '.sl', '.pyd') # names_to_test = (p("/spam/eggs.py"), "ni.py", p("../../shrubbery.py")) -# +# # for name in names_to_test: # self.files_checked = [] # self.filename = name # self.unchanged("import jam") -# +# # if os.path.dirname(name): # name = os.path.dirname(name) + '/jam' # else: # name = 'jam' # expected_checks = set(name + ext for ext in expected_extensions) # expected_checks.add("__init__.py") -# +# # self.assertEqual(set(self.files_checked), expected_checks) -# +# # def test_not_in_package(self): # s = "import bar" # self.always_exists = False # self.present_files = set(["bar.py"]) # self.unchanged(s) -# +# # def test_with_absolute_import_enabled(self): # s = "from __future__ import absolute_import\nimport bar" # self.always_exists = False # self.present_files = set(["__init__.py", "bar.py"]) # self.unchanged(s) -# +# # def test_in_package(self): # b = "import bar" # a = "from . import bar" # self.always_exists = False # self.present_files = set(["__init__.py", "bar.py"]) # self.check(b, a) -# +# # def test_import_from_package(self): # b = "import bar" # a = "from . import bar" # self.always_exists = False # self.present_files = set(["__init__.py", "bar" + os.path.sep]) # self.check(b, a) -# +# # def test_already_relative_import(self): # s = "from . import bar" # self.unchanged(s) -# +# # def test_comments_and_indent(self): # b = "import bar # Foo" # a = "from . import bar # Foo" # self.check(b, a) -# +# # def test_from(self): # b = "from foo import bar, baz" # a = "from .foo import bar, baz" # self.check_both(b, a) -# +# # b = "from foo import bar" # a = "from .foo import bar" # self.check_both(b, a) -# +# # b = "from foo import (bar, baz)" # a = "from .foo import (bar, baz)" # self.check_both(b, a) -# +# # def test_dotted_from(self): # b = "from green.eggs import ham" # a = "from .green.eggs import ham" # self.check_both(b, a) -# +# # def test_from_as(self): # b = "from green.eggs import ham as spam" # a = "from .green.eggs import ham as spam" # self.check_both(b, a) -# +# # def test_import(self): # b = "import foo" # a = "from . import foo" # self.check_both(b, a) -# +# # b = "import foo, bar" # a = "from . import foo, bar" # self.check_both(b, a) -# +# # b = "import foo, bar, x" # a = "from . import foo, bar, x" # self.check_both(b, a) -# +# # b = "import x, y, z" # a = "from . import x, y, z" # self.check_both(b, a) -# +# # def test_import_as(self): # b = "import foo as x" # a = "from . import foo as x" # self.check_both(b, a) -# +# # b = "import a as b, b as c, c as d" # a = "from . import a as b, b as c, c as d" # self.check_both(b, a) -# +# # def test_local_and_absolute(self): # self.always_exists = False # self.present_files = set(["foo.py", "__init__.py"]) -# +# # s = "import foo, bar" # self.warns_unchanged(s, "absolute and local imports together") -# +# # def test_dotted_import(self): # b = "import foo.bar" # a = "from . import foo.bar" # self.check_both(b, a) -# +# # def test_dotted_import_as(self): # b = "import foo.bar as bang" # a = "from . import foo.bar as bang" # self.check_both(b, a) -# +# # def test_prefix(self): # b = """ # # prefix @@ -3772,101 +3824,101 @@ def assert_runs_after(self, *names): # from . import foo.bar # """ # self.check_both(b, a) -# -# +# +# # class Test_set_literal(FixerTestCase): -# +# # fixer = "set_literal" -# +# # def test_basic(self): # b = """set([1, 2, 3])""" # a = """{1, 2, 3}""" # self.check(b, a) -# +# # b = """set((1, 2, 3))""" # a = """{1, 2, 3}""" # self.check(b, a) -# +# # b = """set((1,))""" # a = """{1}""" # self.check(b, a) -# +# # b = """set([1])""" # self.check(b, a) -# +# # b = """set((a, b))""" # a = """{a, b}""" # self.check(b, a) -# +# # b = """set([a, b])""" # self.check(b, a) -# +# # b = """set((a*234, f(args=23)))""" # a = """{a*234, f(args=23)}""" # self.check(b, a) -# +# # b = """set([a*23, f(23)])""" # a = """{a*23, f(23)}""" # self.check(b, a) -# +# # b = """set([a-234**23])""" # a = """{a-234**23}""" # self.check(b, a) -# +# # def test_listcomps(self): # b = """set([x for x in y])""" # a = """{x for x in y}""" # self.check(b, a) -# +# # b = """set([x for x in y if x == m])""" # a = """{x for x in y if x == m}""" # self.check(b, a) -# +# # b = """set([x for x in y for a in b])""" # a = """{x for x in y for a in b}""" # self.check(b, a) -# +# # b = """set([f(x) - 23 for x in y])""" # a = """{f(x) - 23 for x in y}""" # self.check(b, a) -# +# # def test_whitespace(self): # b = """set( [1, 2])""" # a = """{1, 2}""" # self.check(b, a) -# +# # b = """set([1 , 2])""" # a = """{1 , 2}""" # self.check(b, a) -# +# # b = """set([ 1 ])""" # a = """{ 1 }""" # self.check(b, a) -# +# # b = """set( [1] )""" # a = """{1}""" # self.check(b, a) -# +# # b = """set([ 1, 2 ])""" # a = """{ 1, 2 }""" # self.check(b, a) -# +# # b = """set([x for x in y ])""" # a = """{x for x in y }""" # self.check(b, a) -# +# # b = """set( # [1, 2] # ) # """ # a = """{1, 2}\n""" # self.check(b, a) -# +# # def test_comments(self): # b = """set((1, 2)) # Hi""" # a = """{1, 2} # Hi""" # self.check(b, a) -# +# # # This isn't optimal behavior, but the fixer is optional. # b = """ # # Foo @@ -3879,124 +3931,124 @@ def assert_runs_after(self, *names): # {1, 2} # """ # self.check(b, a) -# +# # def test_unchanged(self): # s = """set()""" # self.unchanged(s) -# +# # s = """set(a)""" # self.unchanged(s) -# +# # s = """set(a, b, c)""" # self.unchanged(s) -# +# # # Don't transform generators because they might have to be lazy. # s = """set(x for x in y)""" # self.unchanged(s) -# +# # s = """set(x for x in y if z)""" # self.unchanged(s) -# +# # s = """set(a*823-23**2 + f(23))""" # self.unchanged(s) -# -# +# +# # class Test_sys_exc(FixerTestCase): # fixer = "sys_exc" -# +# # def test_0(self): # b = "sys.exc_type" # a = "sys.exc_info()[0]" # self.check(b, a) -# +# # def test_1(self): # b = "sys.exc_value" # a = "sys.exc_info()[1]" # self.check(b, a) -# +# # def test_2(self): # b = "sys.exc_traceback" # a = "sys.exc_info()[2]" # self.check(b, a) -# +# # def test_3(self): # b = "sys.exc_type # Foo" # a = "sys.exc_info()[0] # Foo" # self.check(b, a) -# +# # def test_4(self): # b = "sys. exc_type" # a = "sys. exc_info()[0]" # self.check(b, a) -# +# # def test_5(self): # b = "sys .exc_type" # a = "sys .exc_info()[0]" # self.check(b, a) -# -# +# +# # class Test_paren(FixerTestCase): # fixer = "paren" -# +# # def test_0(self): # b = """[i for i in 1, 2 ]""" # a = """[i for i in (1, 2) ]""" # self.check(b, a) -# +# # def test_1(self): # b = """[i for i in 1, 2, ]""" # a = """[i for i in (1, 2,) ]""" # self.check(b, a) -# +# # def test_2(self): # b = """[i for i in 1, 2 ]""" # a = """[i for i in (1, 2) ]""" # self.check(b, a) -# +# # def test_3(self): # b = """[i for i in 1, 2 if i]""" # a = """[i for i in (1, 2) if i]""" # self.check(b, a) -# +# # def test_4(self): # b = """[i for i in 1, 2 ]""" # a = """[i for i in (1, 2) ]""" # self.check(b, a) -# +# # def test_5(self): # b = """(i for i in 1, 2)""" # a = """(i for i in (1, 2))""" # self.check(b, a) -# +# # def test_6(self): # b = """(i for i in 1 ,2 if i)""" # a = """(i for i in (1 ,2) if i)""" # self.check(b, a) -# +# # def test_unchanged_0(self): # s = """[i for i in (1, 2)]""" # self.unchanged(s) -# +# # def test_unchanged_1(self): # s = """[i for i in foo()]""" # self.unchanged(s) -# +# # def test_unchanged_2(self): # s = """[i for i in (1, 2) if nothing]""" # self.unchanged(s) -# +# # def test_unchanged_3(self): # s = """(i for i in (1, 2))""" # self.unchanged(s) -# +# # def test_unchanged_4(self): # s = """[i for i in m]""" # self.unchanged(s) -# +# # class Test_metaclass(FixerTestCase): -# +# # fixer = 'metaclass' -# +# # def test_unchanged(self): # self.unchanged("class X(): pass") # self.unchanged("class X(object): pass") @@ -4005,19 +4057,19 @@ def assert_runs_after(self, *names): # self.unchanged("class X(metaclass=Meta): pass") # self.unchanged("class X(b, arg=23, metclass=Meta): pass") # self.unchanged("class X(b, arg=23, metaclass=Meta, other=42): pass") -# +# # s = """ # class X: # def __metaclass__(self): pass # """ # self.unchanged(s) -# +# # s = """ # class X: # a[23] = 74 # """ # self.unchanged(s) -# +# # def test_comments(self): # b = """ # class X: @@ -4030,7 +4082,7 @@ def assert_runs_after(self, *names): # pass # """ # self.check(b, a) -# +# # b = """ # class X: # __metaclass__ = Meta @@ -4042,7 +4094,7 @@ def assert_runs_after(self, *names): # # Bedtime! # """ # self.check(b, a) -# +# # def test_meta(self): # # no-parent class, odd body # b = """ @@ -4055,13 +4107,13 @@ def assert_runs_after(self, *names): # pass # """ # self.check(b, a) -# +# # # one parent class, no body # b = """class X(object): __metaclass__ = Q""" # a = """class X(object, metaclass=Q): pass""" # self.check(b, a) -# -# +# +# # # one parent, simple body # b = """ # class X(object): @@ -4073,7 +4125,7 @@ def assert_runs_after(self, *names): # bar = 7 # """ # self.check(b, a) -# +# # b = """ # class X: # __metaclass__ = Meta; x = 4; g = 23 @@ -4083,7 +4135,7 @@ def assert_runs_after(self, *names): # x = 4; g = 23 # """ # self.check(b, a) -# +# # # one parent, simple body, __metaclass__ last # b = """ # class X(object): @@ -4095,7 +4147,7 @@ def assert_runs_after(self, *names): # bar = 7 # """ # self.check(b, a) -# +# # # redefining __metaclass__ # b = """ # class X(): @@ -4108,7 +4160,7 @@ def assert_runs_after(self, *names): # bar = 7 # """ # self.check(b, a) -# +# # # multiple inheritance, simple body # b = """ # class X(clsA, clsB): @@ -4120,12 +4172,12 @@ def assert_runs_after(self, *names): # bar = 7 # """ # self.check(b, a) -# +# # # keywords in the class statement # b = """class m(a, arg=23): __metaclass__ = Meta""" # a = """class m(a, arg=23, metaclass=Meta): pass""" # self.check(b, a) -# +# # b = """ # class X(expression(2 + 4)): # __metaclass__ = Meta @@ -4135,7 +4187,7 @@ def assert_runs_after(self, *names): # pass # """ # self.check(b, a) -# +# # b = """ # class X(expression(2 + 4), x**4): # __metaclass__ = Meta @@ -4145,7 +4197,7 @@ def assert_runs_after(self, *names): # pass # """ # self.check(b, a) -# +# # b = """ # class X: # __metaclass__ = Meta @@ -4156,44 +4208,44 @@ def assert_runs_after(self, *names): # save.py = 23 # """ # self.check(b, a) -# -# +# +# # class Test_getcwdu(FixerTestCase): -# +# # fixer = 'getcwdu' -# +# # def test_basic(self): # b = """os.getcwdu""" # a = """os.getcwd""" # self.check(b, a) -# +# # b = """os.getcwdu()""" # a = """os.getcwd()""" # self.check(b, a) -# +# # b = """meth = os.getcwdu""" # a = """meth = os.getcwd""" # self.check(b, a) -# +# # b = """os.getcwdu(args)""" # a = """os.getcwd(args)""" # self.check(b, a) -# +# # def test_comment(self): # b = """os.getcwdu() # Foo""" # a = """os.getcwd() # Foo""" # self.check(b, a) -# +# # def test_unchanged(self): # s = """os.getcwd()""" # self.unchanged(s) -# +# # s = """getcwdu()""" # self.unchanged(s) -# +# # s = """os.getcwdb()""" # self.unchanged(s) -# +# # def test_indentation(self): # b = """ # if 1: @@ -4204,124 +4256,124 @@ def assert_runs_after(self, *names): # os.getcwd() # """ # self.check(b, a) -# +# # def test_multilation(self): # b = """os .getcwdu()""" # a = """os .getcwd()""" # self.check(b, a) -# +# # b = """os. getcwdu""" # a = """os. getcwd""" # self.check(b, a) -# +# # b = """os.getcwdu ( )""" # a = """os.getcwd ( )""" # self.check(b, a) -# -# +# +# # class Test_operator(FixerTestCase): -# +# # fixer = "operator" -# +# # def test_operator_isCallable(self): # b = "operator.isCallable(x)" # a = "hasattr(x, '__call__')" # self.check(b, a) -# +# # def test_operator_sequenceIncludes(self): # b = "operator.sequenceIncludes(x, y)" # a = "operator.contains(x, y)" # self.check(b, a) -# +# # b = "operator .sequenceIncludes(x, y)" # a = "operator .contains(x, y)" # self.check(b, a) -# +# # b = "operator. sequenceIncludes(x, y)" # a = "operator. contains(x, y)" # self.check(b, a) -# +# # def test_operator_isSequenceType(self): # b = "operator.isSequenceType(x)" # a = "import collections\nisinstance(x, collections.Sequence)" # self.check(b, a) -# +# # def test_operator_isMappingType(self): # b = "operator.isMappingType(x)" # a = "import collections\nisinstance(x, collections.Mapping)" # self.check(b, a) -# +# # def test_operator_isNumberType(self): # b = "operator.isNumberType(x)" # a = "import numbers\nisinstance(x, numbers.Number)" # self.check(b, a) -# +# # def test_operator_repeat(self): # b = "operator.repeat(x, n)" # a = "operator.mul(x, n)" # self.check(b, a) -# +# # b = "operator .repeat(x, n)" # a = "operator .mul(x, n)" # self.check(b, a) -# +# # b = "operator. repeat(x, n)" # a = "operator. mul(x, n)" # self.check(b, a) -# +# # def test_operator_irepeat(self): # b = "operator.irepeat(x, n)" # a = "operator.imul(x, n)" # self.check(b, a) -# +# # b = "operator .irepeat(x, n)" # a = "operator .imul(x, n)" # self.check(b, a) -# +# # b = "operator. irepeat(x, n)" # a = "operator. imul(x, n)" # self.check(b, a) -# +# # def test_bare_isCallable(self): # s = "isCallable(x)" # t = "You should use 'hasattr(x, '__call__')' here." # self.warns_unchanged(s, t) -# +# # def test_bare_sequenceIncludes(self): # s = "sequenceIncludes(x, y)" # t = "You should use 'operator.contains(x, y)' here." # self.warns_unchanged(s, t) -# +# # def test_bare_operator_isSequenceType(self): # s = "isSequenceType(z)" # t = "You should use 'isinstance(z, collections.Sequence)' here." # self.warns_unchanged(s, t) -# +# # def test_bare_operator_isMappingType(self): # s = "isMappingType(x)" # t = "You should use 'isinstance(x, collections.Mapping)' here." # self.warns_unchanged(s, t) -# +# # def test_bare_operator_isNumberType(self): # s = "isNumberType(y)" # t = "You should use 'isinstance(y, numbers.Number)' here." # self.warns_unchanged(s, t) -# +# # def test_bare_operator_repeat(self): # s = "repeat(x, n)" # t = "You should use 'operator.mul(x, n)' here." # self.warns_unchanged(s, t) -# +# # def test_bare_operator_irepeat(self): # s = "irepeat(y, 187)" # t = "You should use 'operator.imul(y, 187)' here." # self.warns_unchanged(s, t) -# -# +# +# # class Test_exitfunc(FixerTestCase): -# +# # fixer = "exitfunc" -# +# # def test_simple(self): # b = """ # import sys @@ -4333,7 +4385,7 @@ def assert_runs_after(self, *names): # atexit.register(my_atexit) # """ # self.check(b, a) -# +# # def test_names_import(self): # b = """ # import sys, crumbs @@ -4344,7 +4396,7 @@ def assert_runs_after(self, *names): # atexit.register(my_func) # """ # self.check(b, a) -# +# # def test_complex_expression(self): # b = """ # import sys @@ -4356,7 +4408,7 @@ def assert_runs_after(self, *names): # atexit.register(do(d)/a()+complex(f=23, g=23)*expression) # """ # self.check(b, a) -# +# # def test_comments(self): # b = """ # import sys # Foo @@ -4368,7 +4420,7 @@ def assert_runs_after(self, *names): # atexit.register(f) # Blah # """ # self.check(b, a) -# +# # b = """ # import apples, sys, crumbs, larry # Pleasant comments # sys.exitfunc = func @@ -4378,7 +4430,7 @@ def assert_runs_after(self, *names): # atexit.register(func) # """ # self.check(b, a) -# +# # def test_in_a_function(self): # b = """ # import sys @@ -4392,15 +4444,15 @@ def assert_runs_after(self, *names): # atexit.register(func) # """ # self.check(b, a) -# +# # def test_no_sys_import(self): # b = """sys.exitfunc = f""" # a = """atexit.register(f)""" # msg = ("Can't find sys import; Please add an atexit import at the " # "top of your file.") # self.warns(b, a, msg) -# -# +# +# # def test_unchanged(self): # s = """f(sys.exitfunc)""" # self.unchanged(s) diff --git a/tests/test_future/test_list.py b/tests/test_future/test_list.py new file mode 100644 index 00000000..16fb84c5 --- /dev/null +++ b/tests/test_future/test_list.py @@ -0,0 +1,192 @@ +# -*- coding: utf-8 -*- +""" +Tests for the backported class:`list` class. +""" + +from __future__ import absolute_import, unicode_literals, print_function +from future.builtins import * +from future import utils +from future.tests.base import unittest, expectedFailurePY2 + + +class TestList(unittest.TestCase): + def test_isinstance_list(self): + self.assertTrue(isinstance([], list)) + self.assertEqual([1, 2, 3], list([1, 2, 3])) + + def test_isinstance_list_subclass(self): + """ + Issue #89 + """ + value = list([1, 2, 3]) + class Magic(list): + pass + self.assertTrue(isinstance(value, list)) + self.assertFalse(isinstance(value, Magic)) + + def test_list_empty(self): + """ + list() -> [] + """ + self.assertEqual(list(), []) + + def test_list_clear(self): + l = list() + l.append(1) + l.clear() + self.assertEqual(len(l), 0) + l.extend([2, 3]) + l.clear() + self.assertEqual(len(l), 0) + + def test_list_list(self): + self.assertEqual(list(list()), []) + self.assertTrue(isinstance(list(list()), list)) + + def test_list_list2(self): + """ + Issue #50 + """ + l = list([1, 2, 3]) + l2 = list(l) + self.assertEqual(len(l2), 3) + self.assertEqual(l2, [1, 2, 3]) + + def test_list_equal(self): + l = [1, 3, 5] + self.assertEqual(list(l), l) + + def test_list_getitem(self): + l = list('ABCD') + self.assertEqual(l, ['A', 'B', 'C', 'D']) + self.assertEqual(l[0], 'A') + self.assertEqual(l[-1], 'D') + self.assertEqual(l[0:1], ['A']) + self.assertEqual(l[0:2], ['A', 'B']) + self.assertEqual(''.join(l[:]), 'ABCD') + + def test_list_setitem(self): + l = list('ABCD') + l[1] = b'B' + self.assertEqual(l, ['A', b'B', 'C', 'D']) + + def test_list_iteration(self): + l = list('ABCD') + for item in l: + self.assertTrue(isinstance(item, str)) + + def test_list_plus_list(self): + l1 = list('ABCD') + l2 = ['E', 'F', 'G', 'H'] + self.assertEqual(l1 + l2, ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']) + self.assertEqual(type(l1 + l2), list) + self.assertEqual(l2 + l1, ['E', 'F', 'G', 'H', 'A', 'B', 'C', 'D']) + self.assertEqual(l2 + l1, list('EFGHABCD')) + self.assertEqual(type(l2 + l1), list) + self.assertTrue(isinstance(l2 + l1, list)) + + def test_list_contains_something(self): + l = list('ABCD') + self.assertTrue('A' in l) + self.assertFalse(['A', 'B'] in l) + + def test_list_index(self): + l = list('ABCD') + self.assertEqual(l.index('B'), 1) + with self.assertRaises(ValueError): + l.index('') + + def test_copy(self): + l = list('ABCD') + l2 = l.copy() + self.assertEqual(l, l2) + l2.pop() + self.assertNotEqual(l, l2) + + # @unittest.skip('Fails on Python <= 2.7.6 due to list subclass slicing bug') + def test_slice(self): + """ + Do slices return newlist objects? + """ + l = list(u'abcd') + self.assertEqual(l[:2], [u'a', u'b']) + # Fails due to bug on Py2: + # self.assertEqual(type(l[:2]), list) + self.assertEqual(l[-2:], [u'c', u'd']) + # Fails due to bug on Py2: + # self.assertEqual(type(l[-2:]), list) + + # @unittest.skip('Fails on Python <= 2.7.6 due to list subclass slicing bug') + def test_subclassing(self): + """ + Can newlist be subclassed and do list methods then return instances of + the same class? (This is the Py3 behaviour). + """ + class SubClass(list): + pass + l = SubClass(u'abcd') + l2 = SubClass(str(u'abcd')) + self.assertEqual(type(l), SubClass) + self.assertTrue(isinstance(l, list)) + # Fails on Py2.7 but passes on Py3.3: + # self.assertEqual(type(l + l), list) + self.assertTrue(isinstance(l[0], str)) + self.assertEqual(type(l2[0]), str) + # This is not true on Py3.3: + # self.assertEqual(type(l[:2]), SubClass) + self.assertTrue(isinstance(l[:2], list)) + + def test_subclassing_2(self): + """ + Tests __new__ method in subclasses. Fails in versions <= 0.11.4 + """ + class SubClass(list): + def __new__(cls, *args, **kwargs): + self = list.__new__(cls, *args, **kwargs) + assert type(self) == SubClass + return self + l = SubClass(u'abcd') + self.assertEqual(type(l), SubClass) + self.assertEqual(l, [u'a', u'b', u'c', u'd']) + + def test_bool(self): + l = list([]) + l2 = list([1, 3, 5]) + self.assertFalse(bool(l)) + self.assertTrue(bool(l2)) + l2.clear() + self.assertFalse(bool(l2)) + + @expectedFailurePY2 + def test_multiple_inheritance(self): + """ + Issue #96 (for newdict instead of newobject) + """ + if utils.PY2: + from collections import Container + else: + from collections.abc import Container + + class Base(list): + pass + + class Foo(Base, Container): + def __contains__(self, item): + return False + + @expectedFailurePY2 + def test_with_metaclass_and_list(self): + """ + Issue #91 (for newdict instead of newobject) + """ + from future.utils import with_metaclass + + class MetaClass(type): + pass + + class TestClass(with_metaclass(MetaClass, list)): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_future/test_magicsuper.py b/tests/test_future/test_magicsuper.py new file mode 100644 index 00000000..e5bbe093 --- /dev/null +++ b/tests/test_future/test_magicsuper.py @@ -0,0 +1,135 @@ +""" +Tests for the super() function. + +Based on Ryan Kelly's magicsuper.tests +""" + +from __future__ import absolute_import, division, print_function, unicode_literals +import future.builtins.newsuper +from future.builtins import super +from future.tests.base import unittest +from future import utils + + +class TestMagicSuper(unittest.TestCase): + + def test_basic_diamond(self): + class Base(object): + def calc(self,value): + return 2 * value + class Sub1(Base): + def calc(self,value): + return 7 + super().calc(value) + class Sub2(Base): + def calc(self,value): + return super().calc(value) - 1 + class Diamond(Sub1,Sub2): + def calc(self,value): + return 3 * super().calc(value) + b = Base() + s1 = Sub1() + s2 = Sub2() + d = Diamond() + for x in range(10): + self.assertEqual(b.calc(x),2*x) + self.assertEqual(s1.calc(x),7+(2*x)) + self.assertEqual(s2.calc(x),(2*x)-1) + self.assertEqual(d.calc(x),3*(7+((2*x)-1))) + + def test_with_unrelated_methods(self): + class Base(object): + def hello(self): + return "world" + class Sub(Base): + def hello(self): + return "hello " + super().hello() + def other(self): + pass + class SubSub(Sub): + def other(self): + return super().other() + ss = SubSub() + self.assertEqual(ss.hello(),"hello world") + + @unittest.skipIf(utils.PY3, "this test isn't relevant on Py3") + def test_fails_for_oldstyle_class(self): + class OldStyle: + def testme(self): + return super().testme() + o = OldStyle() + self.assertRaises(RuntimeError,o.testme) + + def test_fails_for_raw_functions(self): + def not_a_method(): + super().not_a_method() + self.assertRaises(RuntimeError,not_a_method) + def not_a_method(self): + super().not_a_method() + if utils.PY2: + self.assertRaises(RuntimeError,not_a_method,self) + else: + self.assertRaises(AttributeError,not_a_method,self) + + def assertSuperEquals(self,sobj1,sobj2): + assert sobj1.__self__ is sobj2.__self__ + assert sobj1.__self_class__ is sobj2.__self_class__ + assert sobj1.__thisclass__ is sobj2.__thisclass__ + + def test_call_with_args_does_nothing(self): + if utils.PY2: + from __builtin__ import super as builtin_super + else: + from builtins import super as builtin_super + class Base(object): + def calc(self,value): + return 2 * value + class Sub1(Base): + def calc(self,value): + return 7 + super().calc(value) + class Sub2(Base): + def calc(self,value): + return super().calc(value) - 1 + class Diamond(Sub1,Sub2): + def calc(self,value): + return 3 * super().calc(value) + for cls in (Base,Sub1,Sub2,Diamond,): + obj = cls() + self.assertSuperEquals(builtin_super(cls), super(cls)) + self.assertSuperEquals(builtin_super(cls,obj), super(cls,obj)) + + @unittest.skipIf(utils.PY3, "this test isn't relevant for Py3's super()") + def test_superm(self): + class Base(object): + def getit(self): + return 2 + class Sub(Base): + def getit(self): + return 10 * future.builtins.newsuper.superm() + s = Sub() + self.assertEqual(s.getit(),20) + + def test_use_inside_dunder_new(self): + class Terminal(str): + def __new__(cls, value, token_type): + self = super().__new__(cls, value) + self.token_type = token_type + return self + DOT = Terminal(".", "dit") + self.assertTrue(isinstance(DOT, str)) + self.assertTrue(isinstance(DOT, Terminal)) + + def test_use_inside_classmethod(self): + class Base(object): + @classmethod + def getit(cls): + return 42 + class Singleton(Base): + @classmethod + def getit(cls): + print(super()) + return super().getit() + 1 + self.assertEqual(Singleton.getit(), 43) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_future/test_object.py b/tests/test_future/test_object.py new file mode 100644 index 00000000..4f99cb5a --- /dev/null +++ b/tests/test_future/test_object.py @@ -0,0 +1,289 @@ +""" +Tests to make sure the newobject object (which defines Python 2-compatible +``__unicode__`` and ``next`` methods) is working. +""" + +from __future__ import absolute_import, division +from future import utils +from future.builtins import object, str, next, int, super +from future.utils import implements_iterator, python_2_unicode_compatible +from future.tests.base import unittest, expectedFailurePY2 + + +class TestNewObject(unittest.TestCase): + def test_object_implements_py2_unicode_method(self): + my_unicode_str = u'Unicode string: \u5b54\u5b50' + class A(object): + def __str__(self): + return my_unicode_str + a = A() + self.assertEqual(len(str(a)), 18) + if utils.PY2: + self.assertTrue(hasattr(a, '__unicode__')) + else: + self.assertFalse(hasattr(a, '__unicode__')) + self.assertEqual(str(a), my_unicode_str) + self.assertTrue(isinstance(str(a).encode('utf-8'), bytes)) + if utils.PY2: + self.assertTrue(type(unicode(a)) == unicode) + self.assertEqual(unicode(a), my_unicode_str) + + # Manual equivalent on Py2 without the decorator: + if not utils.PY3: + class B(object): + def __unicode__(self): + return u'Unicode string: \u5b54\u5b50' + def __str__(self): + return unicode(self).encode('utf-8') + b = B() + assert str(a) == str(b) + + def test_implements_py2_iterator(self): + + class Upper(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def __next__(self): # note the Py3 interface + return next(self._iter).upper() + def __iter__(self): + return self + + self.assertEqual(list(Upper('hello')), list('HELLO')) + + # Try combining it with the next() function: + + class MyIter(object): + def __next__(self): + return 'Next!' + def __iter__(self): + return self + + itr = MyIter() + self.assertEqual(next(itr), 'Next!') + + itr2 = MyIter() + for i, item in enumerate(itr2): + if i >= 10: + break + self.assertEqual(item, 'Next!') + + def test_implements_py2_nonzero(self): + + class EvenIsTrue(object): + """ + An integer that evaluates to True if even. + """ + def __init__(self, my_int): + self.my_int = my_int + def __bool__(self): + return self.my_int % 2 == 0 + def __add__(self, other): + return type(self)(self.my_int + other) + + k = EvenIsTrue(5) + self.assertFalse(k) + self.assertFalse(bool(k)) + self.assertTrue(k + 1) + self.assertTrue(bool(k + 1)) + self.assertFalse(k + 2) + + + def test_int_implements_py2_nonzero(self): + """ + Tests whether the newint object provides a __nonzero__ method that + maps to __bool__ in case the user redefines __bool__ in a subclass of + newint. + """ + + class EvenIsTrue(int): + """ + An integer that evaluates to True if even. + """ + def __bool__(self): + return self % 2 == 0 + def __add__(self, other): + val = super().__add__(other) + return type(self)(val) + + k = EvenIsTrue(5) + self.assertFalse(k) + self.assertFalse(bool(k)) + self.assertTrue(k + 1) + self.assertTrue(bool(k + 1)) + self.assertFalse(k + 2) + + def test_non_iterator(self): + """ + The default behaviour of next(o) for a newobject o should be to raise a + TypeError, as with the corresponding builtin object. + """ + o = object() + with self.assertRaises(TypeError): + next(o) + + def test_bool_empty_object(self): + """ + The default result of bool(newobject()) should be True, as with builtin + objects. + """ + o = object() + self.assertTrue(bool(o)) + + class MyClass(object): + pass + + obj = MyClass() + self.assertTrue(bool(obj)) + + def test_isinstance_object_subclass(self): + """ + This was failing before + """ + class A(object): + pass + a = A() + + class B(object): + pass + b = B() + + self.assertFalse(isinstance(a, B)) + self.assertFalse(isinstance(b, A)) + self.assertTrue(isinstance(a, A)) + self.assertTrue(isinstance(b, B)) + + class C(A): + pass + c = C() + + self.assertTrue(isinstance(c, A)) + self.assertFalse(isinstance(c, B)) + self.assertFalse(isinstance(a, C)) + self.assertFalse(isinstance(b, C)) + self.assertTrue(isinstance(c, C)) + + @expectedFailurePY2 + def test_types_isinstance_newobject(self): + a = list() + b = dict() + c = set() + self.assertTrue(isinstance(a, object)) + self.assertTrue(isinstance(b, object)) + self.assertTrue(isinstance(c, object)) + + # Old-style class instances on Py2 should still report as an instance + # of object as usual on Py2: + class D: + pass + d = D() + self.assertTrue(isinstance(d, object)) + + e = object() + self.assertTrue(isinstance(e, object)) + + class F(object): + pass + f = F() + self.assertTrue(isinstance(f, object)) + + class G(F): + pass + g = G() + self.assertTrue(isinstance(g, object)) + + class H(): + pass + h = H() + self.assertTrue(isinstance(h, object)) + + def test_long_special_method(self): + class A(object): + def __int__(self): + return 0 + a = A() + self.assertEqual(int(a), 0) + if utils.PY2: + self.assertEqual(long(a), 0) + + def test_multiple_inheritance(self): + """ + Issue #96 + """ + if utils.PY2: + from collections import Container + else: + from collections.abc import Container + + class Base(object): + pass + + class Foo(Base, Container): + def __contains__(self, item): + return False + + def test_with_metaclass_and_object(self): + """ + Issue #91 + """ + from future.utils import with_metaclass + + class MetaClass(type): + pass + + class TestClass(with_metaclass(MetaClass, object)): + pass + + def test_bool(self): + """ + Issue #211 + """ + from builtins import object + + class ResultSet(object): + def __len__(self): + return 0 + + self.assertTrue(bool(ResultSet()) is False) + + class ResultSet(object): + def __len__(self): + return 2 + + self.assertTrue(bool(ResultSet()) is True) + + def test_bool2(self): + """ + If __bool__ is defined, the presence or absence of __len__ should + be irrelevant. + """ + from builtins import object + + class TrueThing(object): + def __bool__(self): + return True + def __len__(self): + raise RuntimeError('__len__ should not be called') + + self.assertTrue(bool(TrueThing())) + + class FalseThing(object): + def __bool__(self): + return False + def __len__(self): + raise RuntimeError('__len__ should not be called') + + self.assertFalse(bool(FalseThing())) + + def test_cannot_assign_new_attributes_to_object(self): + """ + New attributes cannot be assigned to object() instances in Python. + The same should apply to newobject. + """ + from builtins import object + + with self.assertRaises(AttributeError): + object().arbitrary_attribute_name = True + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_future/test_pasteurize.py b/tests/test_future/test_pasteurize.py new file mode 100644 index 00000000..2b6e2ee6 --- /dev/null +++ b/tests/test_future/test_pasteurize.py @@ -0,0 +1,256 @@ +# -*- coding: utf-8 -*- +""" +This module contains snippets of Python 3 code (invalid Python 2) and +tests for whether they can be passed to ``pasteurize`` and +immediately run under both Python 2 and Python 3. +""" + +from __future__ import print_function, absolute_import + +import pprint +from subprocess import Popen, PIPE +import tempfile +import os + +from future.tests.base import CodeHandler, unittest, skip26 + + +class TestPasteurize(CodeHandler): + """ + After running ``pasteurize``, these Python 3 code snippets should run + on both Py3 and Py2. + """ + + def setUp(self): + # For tests that need a text file: + _, self.textfilename = tempfile.mkstemp(text=True) + super(TestPasteurize, self).setUp() + + def tearDown(self): + os.unlink(self.textfilename) + + @skip26 # Python 2.6's lib2to3 causes the "from builtins import + # range" line to be stuck at the bottom of the module! + def test_range_slice(self): + """ + After running ``pasteurize``, this Python 3 code should run + quickly on both Py3 and Py2 without a MemoryError + """ + code = ''' + for i in range(10**8)[:10]: + pass + ''' + self.unchanged(code, from3=True) + + def test_print(self): + """ + This Python 3-only code is a SyntaxError on Py2 without the + print_function import from __future__. + """ + code = ''' + import sys + print('Hello', file=sys.stderr) + ''' + self.unchanged(code, from3=True) + + def test_division(self): + """ + True division should not be screwed up by conversion from 3 to both + """ + code = ''' + x = 3 / 2 + assert x == 1.5 + ''' + self.unchanged(code, from3=True) + + # TODO: write / fix the raise_ fixer so that it uses the raise_ function + @unittest.expectedFailure + def test_exception_indentation(self): + """ + As of v0.11.2, pasteurize broke the indentation of ``raise`` statements + using with_traceback. Test for this. + """ + before = ''' + import sys + if True: + try: + 'string' + 1 + except TypeError: + ty, va, tb = sys.exc_info() + raise TypeError("can't do that!").with_traceback(tb) + ''' + after = ''' + import sys + from future.utils import raise_with_traceback + if True: + try: + 'string' + 1 + except TypeError: + ty, va, tb = sys.exc_info() + raise_with_traceback(TypeError("can't do that!"), tb) + ''' + self.convert_check(before, after, from3=True) + + # TODO: fix and test this test + @unittest.expectedFailure + def test_urllib_request(self): + """ + Example Python 3 code using the new urllib.request module. + + Does the ``pasteurize`` script handle this? + """ + before = """ + import pprint + import urllib.request + + URL = 'http://pypi.python.org/pypi/{}/json' + package = 'future' + + r = urllib.request.urlopen(URL.format(package)) + pprint.pprint(r.read()) + """ + after = """ + import pprint + import future.standard_library.urllib.request as urllib_request + + URL = 'http://pypi.python.org/pypi/{}/json' + package = 'future' + + r = urllib_request.urlopen(URL.format(package)) + pprint.pprint(r.read()) + """ + + self.convert_check(before, after, from3=True) + + def test_urllib_refactor2(self): + before = """ + import urllib.request, urllib.parse + + f = urllib.request.urlopen(url, timeout=15) + filename = urllib.parse.urlparse(url)[2].split('/')[-1] + """ + + after = """ + from future.standard_library.urllib import request as urllib_request + from future.standard_library.urllib import parse as urllib_parse + + f = urllib_request.urlopen(url, timeout=15) + filename = urllib_parse.urlparse(url)[2].split('/')[-1] + """ + + def test_correct_exit_status(self): + """ + Issue #119: futurize and pasteurize were not exiting with the correct + status code. This is because the status code returned from + libfuturize.main.main() etc. was a ``newint``, which sys.exit() always + translates into 1! + """ + from libpasteurize.main import main + # Try pasteurizing this test script: + retcode = main([self.textfilename]) + self.assertTrue(isinstance(retcode, int)) # i.e. Py2 builtin int + + +class TestFuturizeAnnotations(CodeHandler): + @unittest.expectedFailure + def test_return_annotations_alone(self): + before = "def foo() -> 'bar': pass" + after = """ + def foo(): pass + foo.__annotations__ = {'return': 'bar'} + """ + self.convert_check(before, after, from3=True) + + b = """ + def foo() -> "bar": + print "baz" + print "what's next, again?" + """ + a = """ + def foo(): + print "baz" + print "what's next, again?" + """ + self.convert_check(b, a, from3=True) + + @unittest.expectedFailure + def test_single_param_annotations(self): + b = "def foo(bar:'baz'): pass" + a = """ + def foo(bar): pass + foo.__annotations__ = {'bar': 'baz'} + """ + self.convert_check(b, a, from3=True) + + b = """ + def foo(bar:"baz"="spam"): + print("what's next, again?") + print("whatever.") + """ + a = """ + def foo(bar="spam"): + print("what's next, again?") + print("whatever.") + foo.__annotations__ = {'bar': 'baz'} + """ + self.convert_check(b, a, from3=True) + + def test_multiple_param_annotations(self): + b = "def foo(bar:'spam'=False, baz:'eggs'=True, ham:False='spaghetti'): pass" + a = "def foo(bar=False, baz=True, ham='spaghetti'): pass" + self.convert_check(b, a, from3=True) + + b = """ + def foo(bar:"spam"=False, baz:"eggs"=True, ham:False="spam"): + print("this is filler, just doing a suite") + print("suites require multiple lines.") + """ + a = """ + def foo(bar=False, baz=True, ham="spam"): + print("this is filler, just doing a suite") + print("suites require multiple lines.") + """ + self.convert_check(b, a, from3=True) + + def test_mixed_annotations(self): + b = "def foo(bar=False, baz:'eggs'=True, ham:False='spaghetti') -> 'zombies': pass" + a = "def foo(bar=False, baz=True, ham='spaghetti'): pass" + self.convert_check(b, a, from3=True) + + b = """ + def foo(bar:"spam"=False, baz=True, ham:False="spam") -> 'air': + print("this is filler, just doing a suite") + print("suites require multiple lines.") + """ + a = """ + def foo(bar=False, baz=True, ham="spam"): + print("this is filler, just doing a suite") + print("suites require multiple lines.") + """ + self.convert_check(b, a, from3=True) + + b = "def foo(bar) -> 'brains': pass" + a = "def foo(bar): pass" + self.convert_check(b, a, from3=True) + + def test_functions_unchanged(self): + s = "def foo(): pass" + self.unchanged(s, from3=True) + + s = """ + def foo(): + pass + pass + """ + self.unchanged(s, from3=True) + + s = """ + def foo(bar='baz'): + pass + pass + """ + self.unchanged(s, from3=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_future/test_py2_str_literals_to_bytes.py b/tests/test_future/test_py2_str_literals_to_bytes.py new file mode 100644 index 00000000..47866c8c --- /dev/null +++ b/tests/test_future/test_py2_str_literals_to_bytes.py @@ -0,0 +1 @@ +a = '123' diff --git a/tests/test_future/test_range.py b/tests/test_future/test_range.py new file mode 100644 index 00000000..dba15228 --- /dev/null +++ b/tests/test_future/test_range.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- +""" +Tests for the backported class:`range` class. +""" + +from future.builtins import range +from future.tests.base import unittest + +from operator import attrgetter + +from future.utils import PY2 + +if PY2: + from collections import Iterator, Sequence +else: + from collections.abc import Iterator, Sequence + + +class RangeTests(unittest.TestCase): + def test_range(self): + self.assertTrue(isinstance(range(0), Sequence)) + self.assertTrue(isinstance(reversed(range(0)), Iterator)) + + def test_bool_range(self): + self.assertFalse(range(0)) + self.assertTrue(range(1)) + self.assertFalse(range(1, 1)) + self.assertFalse(range(5, 2)) + + def test_equality_range(self): + self.assertEqual(range(7), range(7)) + self.assertEqual(range(0), range(1, 1)) + self.assertEqual(range(0, 10, 3), range(0, 11, 3)) + + def test_contains(self): + self.assertIn(1, range(2)) + self.assertNotIn(10, range(2)) + self.assertNotIn(None, range(2)) + self.assertNotIn("", range(2)) + + # Use strict equality of attributes when slicing to catch subtle differences + def assertRangesEqual(self, r1, r2): + by_attrs = attrgetter('start', 'stop', 'step') + self.assertEqual(by_attrs(r1), by_attrs(r2)) + + def test_slice_empty_range(self): + self.assertRangesEqual(range(0)[:], range(0)) + self.assertRangesEqual(range(0)[::-1], range(-1, -1, -1)) + + def test_slice_overflow_range(self): + r = range(8) + self.assertRangesEqual(r[2:200], range(2, 8)) + self.assertRangesEqual(r[-200:-2], range(0, 6)) + + def test_slice_range(self): + r = range(-8, 8) + self.assertRangesEqual(r[:], range(-8, 8)) + self.assertRangesEqual(r[:2], range(-8, -6)) + self.assertRangesEqual(r[:-2], range(-8, 6)) + self.assertRangesEqual(r[2:], range(-6, 8)) + self.assertRangesEqual(r[-2:], range(6, 8)) + self.assertRangesEqual(r[2:-2], range(-6, 6)) + + def test_rev_slice_range(self): + r = range(-8, 8) + self.assertRangesEqual(r[::-1], range(7, -9, -1)) + self.assertRangesEqual(r[:2:-1], range(7, -6, -1)) + self.assertRangesEqual(r[:-2:-1], range(7, 6, -1)) + self.assertRangesEqual(r[2::-1], range(-6, -9, -1)) + self.assertRangesEqual(r[-2::-1], range(6, -9, -1)) + self.assertRangesEqual(r[-2:2:-1], range(6, -6, -1)) + + def test_slice_rev_range(self): + r = range(8, -8, -1) + self.assertRangesEqual(r[:], range(8, -8, -1)) + self.assertRangesEqual(r[:2], range(8, 6, -1)) + self.assertRangesEqual(r[:-2], range(8, -6, -1)) + self.assertRangesEqual(r[2:], range(6, -8, -1)) + self.assertRangesEqual(r[-2:], range(-6, -8, -1)) + self.assertRangesEqual(r[2:-2], range(6, -6, -1)) + + def test_rev_slice_rev_range(self): + r = range(8, -8, -1) + self.assertRangesEqual(r[::-1], range(-7, 9)) + self.assertRangesEqual(r[:2:-1], range(-7, 6)) + self.assertRangesEqual(r[:-2:-1], range(-7, -6)) + self.assertRangesEqual(r[2::-1], range(6, 9)) + self.assertRangesEqual(r[-2::-1], range(-6, 9)) + self.assertRangesEqual(r[-2:2:-1], range(-6, 6)) + + def test_stepped_slice_range(self): + r = range(-8, 8) + self.assertRangesEqual(r[::2], range(-8, 8, 2)) + self.assertRangesEqual(r[:2:2], range(-8, -6, 2)) + self.assertRangesEqual(r[:-2:2], range(-8, 6, 2)) + self.assertRangesEqual(r[2::2], range(-6, 8, 2)) + self.assertRangesEqual(r[-2::2], range(6, 8, 2)) + self.assertRangesEqual(r[2:-2:2], range(-6, 6, 2)) + + def test_rev_stepped_slice_range(self): + r = range(-8, 8) + self.assertRangesEqual(r[::-2], range(7, -9, -2)) + self.assertRangesEqual(r[:2:-2], range(7, -6, -2)) + self.assertRangesEqual(r[:-2:-2], range(7, 6, -2)) + self.assertRangesEqual(r[2::-2], range(-6, -9, -2)) + self.assertRangesEqual(r[-2::-2], range(6, -9, -2)) + self.assertRangesEqual(r[-2:2:-2], range(6, -6, -2)) + + def test_stepped_slice_rev_range(self): + r = range(8, -8, -1) + self.assertRangesEqual(r[::2], range(8, -8, -2)) + self.assertRangesEqual(r[:2:2], range(8, 6, -2)) + self.assertRangesEqual(r[:-2:2], range(8, -6, -2)) + self.assertRangesEqual(r[2::2], range(6, -8, -2)) + self.assertRangesEqual(r[-2::2], range(-6, -8, -2)) + self.assertRangesEqual(r[2:-2:2], range(6, -6, -2)) + + def test_rev_stepped_slice_rev_range(self): + r = range(8, -8, -1) + self.assertRangesEqual(r[::-2], range(-7, 9, 2)) + self.assertRangesEqual(r[:2:-2], range(-7, 6, 2)) + self.assertRangesEqual(r[:-2:-2], range(-7, -6, 2)) + self.assertRangesEqual(r[2::-2], range(6, 9, 2)) + self.assertRangesEqual(r[-2::-2], range(-6, 9, 2)) + self.assertRangesEqual(r[-2:2:-2], range(-6, 6, 2)) + + def test_slice_stepped_range(self): + r = range(-8, 8, 2) + self.assertRangesEqual(r[:], range(-8, 8, 2)) + self.assertRangesEqual(r[:2], range(-8, -4, 2)) + self.assertRangesEqual(r[:-2], range(-8, 4, 2)) + self.assertRangesEqual(r[2:], range(-4, 8, 2)) + self.assertRangesEqual(r[-2:], range(4, 8, 2)) + self.assertRangesEqual(r[2:-2], range(-4, 4, 2)) + + def test_rev_slice_stepped_range(self): + r = range(-8, 8, 2) + self.assertRangesEqual(r[::-1], range(6, -10, -2)) + self.assertRangesEqual(r[:2:-1], range(6, -4, -2)) + self.assertRangesEqual(r[:-2:-1], range(6, 4, -2)) + self.assertRangesEqual(r[2::-1], range(-4, -10, -2)) + self.assertRangesEqual(r[-2::-1], range(4, -10, -2)) + self.assertRangesEqual(r[-2:2:-1], range(4, -4, -2)) + + def test_slice_rev_stepped_range(self): + r = range(8, -8, -2) + self.assertRangesEqual(r[:], range(8, -8, -2)) + self.assertRangesEqual(r[:2], range(8, 4, -2)) + self.assertRangesEqual(r[:-2], range(8, -4, -2)) + self.assertRangesEqual(r[2:], range(4, -8, -2)) + self.assertRangesEqual(r[-2:], range(-4, -8, -2)) + self.assertRangesEqual(r[2:-2], range(4, -4, -2)) + + def test_rev_slice_rev_stepped_range(self): + r = range(8, -8, -2) + self.assertRangesEqual(r[::-1], range(-6, 10, 2)) + self.assertRangesEqual(r[:2:-1], range(-6, 4, 2)) + self.assertRangesEqual(r[:-2:-1], range(-6, -4, 2)) + self.assertRangesEqual(r[2::-1], range(4, 10, 2)) + self.assertRangesEqual(r[-2::-1], range(-4, 10, 2)) + self.assertRangesEqual(r[-2:2:-1], range(-4, 4, 2)) + + def test_stepped_slice_stepped_range(self): + r = range(-8, 8, 2) + self.assertRangesEqual(r[::2], range(-8, 8, 4)) + self.assertRangesEqual(r[:2:2], range(-8, -4, 4)) + self.assertRangesEqual(r[:-2:2], range(-8, 4, 4)) + self.assertRangesEqual(r[2::2], range(-4, 8, 4)) + self.assertRangesEqual(r[-2::2], range(4, 8, 4)) + self.assertRangesEqual(r[2:-2:2], range(-4, 4, 4)) + + def test_rev_stepped_slice_stepped_range(self): + r = range(-8, 8, 2) + self.assertRangesEqual(r[::-2], range(6, -10, -4)) + self.assertRangesEqual(r[:2:-2], range(6, -4, -4)) + self.assertRangesEqual(r[:-2:-2], range(6, 4, -4)) + self.assertRangesEqual(r[2::-2], range(-4, -10, -4)) + self.assertRangesEqual(r[-2::-2], range(4, -10, -4)) + self.assertRangesEqual(r[-2:2:-2], range(4, -4, -4)) + + def test_stepped_slice_rev_stepped_range(self): + r = range(8, -8, -2) + self.assertRangesEqual(r[::2], range(8, -8, -4)) + self.assertRangesEqual(r[:2:2], range(8, 4, -4)) + self.assertRangesEqual(r[:-2:2], range(8, -4, -4)) + self.assertRangesEqual(r[2::2], range(4, -8, -4)) + self.assertRangesEqual(r[-2::2], range(-4, -8, -4)) + self.assertRangesEqual(r[2:-2:2], range(4, -4, -4)) + + def test_rev_stepped_slice_rev_stepped_range(self): + r = range(8, -8, -2) + self.assertRangesEqual(r[::-2], range(-6, 10, 4)) + self.assertRangesEqual(r[:2:-2], range(-6, 4, 4)) + self.assertRangesEqual(r[:-2:-2], range(-6, -4, 4)) + self.assertRangesEqual(r[2::-2], range(4, 10, 4)) + self.assertRangesEqual(r[-2::-2], range(-4, 10, 4)) + self.assertRangesEqual(r[-2:2:-2], range(-4, 4, 4)) + + def test_slice_zero_step(self): + msg = '^slice step cannot be zero$' + with self.assertRaisesRegex(ValueError, msg): + range(8)[::0] + + def test_properties(self): + # Exception string differs between PY2/3 + r = range(0) + with self.assertRaises(AttributeError): + r.start = 0 + with self.assertRaises(AttributeError): + r.stop = 0 + with self.assertRaises(AttributeError): + r.step = 0 + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_future/test_requests.py b/tests/test_future/test_requests.py new file mode 100644 index 00000000..e362a1fd --- /dev/null +++ b/tests/test_future/test_requests.py @@ -0,0 +1,107 @@ +""" +Tests for whether the standard library hooks in ``future`` are compatible with +the ``requests`` package. +""" + +from __future__ import absolute_import, unicode_literals, print_function +from future import standard_library +from future.tests.base import unittest, CodeHandler +import textwrap +import sys +import os +import io + + +# Don't import requests first. This avoids the problem we want to expose: +# with standard_library.suspend_hooks(): +# try: +# import requests +# except ImportError: +# requests = None + + +class write_module(object): + """ + A context manager to streamline the tests. Creates a temp file for a + module designed to be imported by the ``with`` block, then removes it + afterwards. + """ + def __init__(self, code, tempdir): + self.code = code + self.tempdir = tempdir + + def __enter__(self): + print('Creating {0}test_imports_future_stdlib.py ...'.format(self.tempdir)) + with io.open(self.tempdir + 'test_imports_future_stdlib.py', 'wt', + encoding='utf-8') as f: + f.write(textwrap.dedent(self.code)) + sys.path.insert(0, self.tempdir) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ + If an exception occurred, we leave the file for inspection. + """ + sys.path.remove(self.tempdir) + if exc_type is None: + # No exception occurred + os.remove(self.tempdir + 'test_imports_future_stdlib.py') + try: + os.remove(self.tempdir + 'test_imports_future_stdlib.pyc') + except OSError: + pass + + +class TestRequests(CodeHandler): + """ + This class tests whether the requests module conflicts with the + standard library import hooks, as in issue #19. + """ + def test_remove_hooks_then_requests(self): + code = """ + from future import standard_library + standard_library.install_hooks() + + import builtins + import http.client + import html.parser + """ + with write_module(code, self.tempdir): + import test_imports_future_stdlib + standard_library.remove_hooks() + try: + import requests + except ImportError: + print("Requests doesn't seem to be available. Skipping requests test ...") + else: + r = requests.get('http://google.com') + self.assertTrue(r) + self.assertTrue(True) + + + def test_requests_cm(self): + """ + Tests whether requests can be used importing standard_library modules + previously with the hooks context manager + """ + code = """ + from future import standard_library + with standard_library.hooks(): + import builtins + import html.parser + import http.client + """ + with write_module(code, self.tempdir): + import test_imports_future_stdlib + try: + import requests + except ImportError: + print("Requests doesn't seem to be available. Skipping requests test ...") + else: + r = requests.get('http://google.com') + self.assertTrue(r) + self.assertTrue(True) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_future/test_standard_library.py b/tests/test_future/test_standard_library.py new file mode 100644 index 00000000..43f73aa5 --- /dev/null +++ b/tests/test_future/test_standard_library.py @@ -0,0 +1,637 @@ +""" +Tests for the future.standard_library module +""" + +from __future__ import absolute_import, print_function +from future import standard_library +from future import utils +from future.tests.base import unittest, CodeHandler, expectedFailurePY2 + +import sys +import tempfile +import copy +import textwrap +from subprocess import CalledProcessError + + +class TestStandardLibraryReorganization(CodeHandler): + + def setUp(self): + self.interpreter = sys.executable + standard_library.install_aliases() + super(TestStandardLibraryReorganization, self).setUp() + + def tearDown(self): + # standard_library.remove_hooks() + pass + + def test_can_import_several(self): + """ + This test failed in v0.12-pre if e.g. + future/standard_library/email/header.py contained: + + from future import standard_library + standard_library.remove_hooks() + """ + + import future.moves.urllib.parse as urllib_parse + import future.moves.urllib.request as urllib_request + + import http.server + for m in [urllib_parse, urllib_request, http.server]: + self.assertTrue(m is not None) + + def test_is_py2_stdlib_module(self): + """ + Tests whether the internal is_py2_stdlib_module function (called by the + sys.modules scrubbing functions) is reliable. + """ + externalmodules = [standard_library, utils] + self.assertTrue(not any([standard_library.is_py2_stdlib_module(module) + for module in externalmodules])) + + py2modules = [sys, tempfile, copy, textwrap] + if utils.PY2: + # Debugging: + for module in py2modules: + if hasattr(module, '__file__'): + print(module.__file__, file=sys.stderr) + self.assertTrue(all([standard_library.is_py2_stdlib_module(module) + for module in py2modules])) + else: + self.assertTrue( + not any ([standard_library.is_py2_stdlib_module(module) + for module in py2modules])) + + # @unittest.skip("No longer relevant") + # def test_all_modules_identical(self): + # """ + # Tests whether all of the old imports in RENAMES are accessible + # under their new names. + # """ + # for (oldname, newname) in standard_library.RENAMES.items(): + # if newname == 'winreg' and sys.platform not in ['win32', 'win64']: + # continue + # if newname in standard_library.REPLACED_MODULES: + # # Skip this check for e.g. the stdlib's ``test`` module, + # # which we have replaced completely. + # continue + # oldmod = __import__(oldname) + # newmod = __import__(newname) + # if '.' not in oldname: + # self.assertEqual(oldmod, newmod) + + @expectedFailurePY2 + def test_suspend_hooks(self): + """ + Code like the try/except block here appears in Pyflakes v0.6.1. This + method tests whether suspend_hooks() works as advertised. + """ + example_PY2_check = False + with standard_library.suspend_hooks(): + # An example of fragile import code that we don't want to break: + try: + import builtins + except ImportError: + example_PY2_check = True + if utils.PY2: + self.assertTrue(example_PY2_check) + else: + self.assertFalse(example_PY2_check) + # The import should succeed again now: + import builtins + + @expectedFailurePY2 + def test_disable_hooks(self): + """ + Tests the old (deprecated) names. These deprecated aliases should be + removed by version 1.0 + """ + example_PY2_check = False + + standard_library.enable_hooks() # deprecated name + old_meta_path = copy.copy(sys.meta_path) + + standard_library.disable_hooks() + standard_library.scrub_future_sys_modules() + if utils.PY2: + self.assertTrue(len(old_meta_path) == len(sys.meta_path) + 1) + else: + self.assertTrue(len(old_meta_path) == len(sys.meta_path)) + + # An example of fragile import code that we don't want to break: + try: + import builtins + except ImportError: + example_PY2_check = True + if utils.PY2: + self.assertTrue(example_PY2_check) + else: + self.assertFalse(example_PY2_check) + + standard_library.install_hooks() + + # Imports should succeed again now: + import builtins + import html + if utils.PY2: + self.assertTrue(standard_library.detect_hooks()) + self.assertTrue(len(old_meta_path) == len(sys.meta_path)) + + @expectedFailurePY2 + def test_remove_hooks2(self): + """ + As above, but with the new names + """ + example_PY2_check = False + + standard_library.install_hooks() + old_meta_path = copy.copy(sys.meta_path) + + standard_library.remove_hooks() + standard_library.scrub_future_sys_modules() + if utils.PY2: + self.assertTrue(len(old_meta_path) == len(sys.meta_path) + 1) + else: + self.assertTrue(len(old_meta_path) == len(sys.meta_path)) + + # An example of fragile import code that we don't want to break: + try: + import builtins + except ImportError: + example_PY2_check = True + if utils.PY2: + self.assertTrue(example_PY2_check) + else: + self.assertFalse(example_PY2_check) + standard_library.install_hooks() + # The import should succeed again now: + import builtins + self.assertTrue(len(old_meta_path) == len(sys.meta_path)) + + def test_detect_hooks(self): + """ + Tests whether the future.standard_library.detect_hooks is doing + its job. + """ + standard_library.install_hooks() + if utils.PY2: + self.assertTrue(standard_library.detect_hooks()) + + meta_path = copy.copy(sys.meta_path) + + standard_library.remove_hooks() + if utils.PY2: + self.assertEqual(len(meta_path), len(sys.meta_path) + 1) + self.assertFalse(standard_library.detect_hooks()) + + @unittest.skipIf(utils.PY3, 'not testing for old urllib on Py3') + def test_old_urllib_import(self): + """ + Tests whether an imported module can import the old urllib package. + Importing future.standard_library in a script should be possible and + not disrupt any uses of the old Py2 standard library names in modules + imported by that script. + """ + code1 = ''' + from future import standard_library + with standard_library.suspend_hooks(): + import module_importing_old_urllib + ''' + self._write_test_script(code1, 'runme.py') + code2 = ''' + import urllib + assert 'urlopen' in dir(urllib) + print('Import succeeded!') + ''' + self._write_test_script(code2, 'module_importing_old_urllib.py') + output = self._run_test_script('runme.py') + print(output) + self.assertTrue(True) + + def test_sys_intern(self): + """ + Py2's builtin intern() has been moved to the sys module. Tests + whether sys.intern is available. + """ + from sys import intern + if utils.PY3: + self.assertEqual(intern('hello'), 'hello') + else: + # intern() requires byte-strings on Py2: + self.assertEqual(intern(b'hello'), b'hello') + + def test_sys_maxsize(self): + """ + Tests whether sys.maxsize is available. + """ + from sys import maxsize + self.assertTrue(maxsize > 0) + + def test_itertools_filterfalse(self): + """ + Tests whether itertools.filterfalse is available. + """ + from itertools import filterfalse + not_div_by_3 = filterfalse(lambda x: x % 3 == 0, range(8)) + self.assertEqual(list(not_div_by_3), [1, 2, 4, 5, 7]) + + def test_itertools_zip_longest(self): + """ + Tests whether itertools.zip_longest is available. + """ + from itertools import zip_longest + a = (1, 2) + b = [2, 4, 6] + self.assertEqual(list(zip_longest(a, b)), + [(1, 2), (2, 4), (None, 6)]) + + def test_ChainMap(self): + """ + Tests whether collections.ChainMap is available. + """ + from collections import ChainMap + cm = ChainMap() + + @unittest.expectedFailure + @unittest.skipIf(utils.PY3, 'generic import tests are for Py2 only') + def test_import_failure_from_module(self): + """ + Tests whether e.g. "import socketserver" succeeds in a module + imported by another module that has used and removed the stdlib hooks. + We want this to fail; the stdlib hooks should not bleed to imported + modules too without their explicitly invoking them. + """ + code1 = ''' + from future import standard_library + standard_library.install_hooks() + standard_library.remove_hooks() + import importme2 + ''' + code2 = ''' + import socketserver + print('Uh oh. importme2 should have raised an ImportError.') + ''' + self._write_test_script(code1, 'importme1.py') + self._write_test_script(code2, 'importme2.py') + with self.assertRaises(CalledProcessError): + output = self._run_test_script('importme1.py') + + # Disabled since v0.16.0: + # def test_configparser(self): + # import configparser + + def test_copyreg(self): + import copyreg + + def test_pickle(self): + import pickle + + def test_profile(self): + import profile + + def test_stringio(self): + from io import StringIO + s = StringIO(u'test') + for method in ['tell', 'read', 'seek', 'close', 'flush']: + self.assertTrue(hasattr(s, method)) + + def test_bytesio(self): + from io import BytesIO + s = BytesIO(b'test') + for method in ['tell', 'read', 'seek', 'close', 'flush', 'getvalue']: + self.assertTrue(hasattr(s, method)) + + def test_SimpleQueue(self): + from multiprocessing import SimpleQueue + sq = SimpleQueue() + self.assertTrue(sq.empty()) + sq.put('thing') + self.assertFalse(sq.empty()) + self.assertEqual(sq.get(), 'thing') + self.assertTrue(sq.empty()) + + def test_queue(self): + import queue + q = queue.Queue() + q.put('thing') + self.assertFalse(q.empty()) + + def test_reprlib(self): + import reprlib + self.assertTrue(True) + + def test_socketserver(self): + import socketserver + self.assertTrue(True) + + @unittest.skip("Not testing tkinter import (it may be installed separately from Python)") + def test_tkinter(self): + import tkinter + self.assertTrue(True) + + def test_builtins(self): + import builtins + self.assertTrue(hasattr(builtins, 'tuple')) + + @unittest.skip("ssl redirect support on pypi isn't working as expected for now ...") + def test_urllib_request_ssl_redirect(self): + """ + This site redirects to https://... + It therefore requires ssl support. + """ + import future.moves.urllib.request as urllib_request + from pprint import pprint + URL = 'http://pypi.python.org/pypi/{0}/json' + package = 'future' + r = urllib_request.urlopen(URL.format(package)) + # pprint(r.read().decode('utf-8')) + self.assertTrue(True) + + def test_moves_urllib_request_http(self): + """ + This site (python-future.org) uses plain http (as of 2014-09-23). + """ + import future.moves.urllib.request as urllib_request + from pprint import pprint + URL = 'http://python-future.org' + r = urllib_request.urlopen(URL) + data = r.read() + self.assertTrue(b'' in data) + + def test_urllib_request_http(self): + """ + This site (python-future.org) uses plain http (as of 2014-09-23). + """ + import urllib.request as urllib_request + from pprint import pprint + URL = 'http://python-future.org' + r = urllib_request.urlopen(URL) + data = r.read() + self.assertTrue(b'' in data) + + def test_html_import(self): + import html + import html.entities + import html.parser + self.assertTrue(True) + + def test_http_client_import(self): + import http.client + self.assertTrue(True) + + def test_other_http_imports(self): + import http + import http.server + import http.cookies + import http.cookiejar + self.assertTrue(True) + + def test_urllib_imports_moves(self): + import future.moves.urllib + import future.moves.urllib.parse + import future.moves.urllib.request + import future.moves.urllib.robotparser + import future.moves.urllib.error + import future.moves.urllib.response + self.assertTrue(True) + + def test_urllib_imports_install_aliases(self): + with standard_library.suspend_hooks(): + standard_library.install_aliases() + import urllib + import urllib.parse + import urllib.request + import urllib.robotparser + import urllib.error + import urllib.response + self.assertTrue(True) + + def test_urllib_imports_cm(self): + with standard_library.hooks(): + import urllib + import urllib.parse + import urllib.request + import urllib.robotparser + import urllib.error + import urllib.response + self.assertTrue(True) + + def test_urllib_imports_install_hooks(self): + standard_library.remove_hooks() + standard_library.install_hooks() + import urllib + import urllib.parse + import urllib.request + import urllib.robotparser + import urllib.error + import urllib.response + self.assertTrue(True) + + def test_underscore_prefixed_modules(self): + import _thread + if sys.version_info < (3, 9): + import _dummy_thread + import _markupbase + self.assertTrue(True) + + def test_reduce(self): + """ + reduce has been moved to the functools module + """ + import functools + self.assertEqual(functools.reduce(lambda x, y: x+y, range(1, 6)), 15) + + def test_collections_userstuff(self): + """ + UserDict, UserList, and UserString have been moved to the + collections module. + """ + from collections import UserDict + from collections import UserList + from collections import UserString + self.assertTrue(True) + + def test_reload(self): + """ + reload has been moved to the imp module + """ + # imp was deprecated in python 3.6 + if sys.version_info >= (3, 6): + import importlib as imp + else: + import imp + imp.reload(sys) + self.assertTrue(True) + + def test_install_aliases(self): + """ + Does the install_aliases() interface monkey-patch urllib etc. successfully? + """ + from future.standard_library import remove_hooks, install_aliases + remove_hooks() + install_aliases() + + from collections import Counter, OrderedDict # backported to Py2.6 + from collections import UserDict, UserList, UserString + + # Requires Python dbm support: + # import dbm + # import dbm.dumb + # import dbm.gnu + # import dbm.ndbm + + from itertools import filterfalse, zip_longest + + from subprocess import check_output # backported to Py2.6 + from subprocess import getoutput, getstatusoutput + + from sys import intern + + # test_support may not be available (e.g. on Anaconda Py2.6): + # import test.support + + import urllib.error + import urllib.parse + import urllib.request + import urllib.response + import urllib.robotparser + + self.assertTrue('urlopen' in dir(urllib.request)) + + +class TestFutureMoves(CodeHandler): + def test_future_moves_urllib_request(self): + from future.moves.urllib import request as urllib_request + functions = ['getproxies', + 'pathname2url', + 'proxy_bypass', + 'quote', + 'request_host', + 'splitattr', + 'splithost', + 'splitpasswd', + 'splitport', + 'splitquery', + 'splittag', + 'splittype', + 'splituser', + 'splitvalue', + 'thishost', + 'to_bytes', + 'unquote', + # 'unquote_to_bytes', # Is there an equivalent in the Py2 stdlib? + 'unwrap', + 'url2pathname', + 'urlcleanup', + 'urljoin', + 'urlopen', + 'urlparse', + 'urlretrieve', + 'urlsplit', + 'urlunparse'] + self.assertTrue(all(fn in dir(urllib_request) for fn in functions)) + + def test_future_moves(self): + """ + Ensure everything is available from the future.moves interface that we + claim and expect. (Issue #104). + """ + from future.moves.collections import Counter, OrderedDict # backported to Py2.6 + from future.moves.collections import UserDict, UserList, UserString + + from future.moves import configparser + from future.moves import copyreg + + from future.moves.itertools import filterfalse, zip_longest + + from future.moves import html + import future.moves.html.entities + import future.moves.html.parser + + from future.moves import http + import future.moves.http.client + import future.moves.http.cookies + import future.moves.http.cookiejar + import future.moves.http.server + + from future.moves import queue + + from future.moves import socketserver + + from future.moves.subprocess import check_output # even on Py2.6 + from future.moves.subprocess import getoutput, getstatusoutput + + from future.moves.sys import intern + + from future.moves import urllib + import future.moves.urllib.error + import future.moves.urllib.parse + import future.moves.urllib.request + import future.moves.urllib.response + import future.moves.urllib.robotparser + + try: + # Is _winreg available on Py2? If so, ensure future.moves._winreg is available too: + import _winreg + except ImportError: + pass + else: + from future.moves import winreg + + from future.moves import xmlrpc + import future.moves.xmlrpc.client + import future.moves.xmlrpc.server + + from future.moves import _dummy_thread + from future.moves import _markupbase + from future.moves import _thread + + def test_future_moves_dbm(self): + """ + Do the dbm imports work? + """ + from future.moves import dbm + dbm.ndbm + from future.moves.dbm import dumb + try: + # Is gdbm available on Py2? If so, ensure dbm.gnu is available too: + import gdbm + except ImportError: + pass + else: + from future.moves.dbm import gnu + from future.moves.dbm import ndbm + + +# Running the following tkinter test causes the following bizarre test failure: +# +# ====================================================================== +# FAIL: test_open_default_encoding (future.tests.test_builtins.BuiltinTest) +# ---------------------------------------------------------------------- +# Traceback (most recent call last): +# File "/home/user/Install/BleedingEdge/python-future/future/tests/test_builtins.py", line 1219, in test_open_default_encoding +# self.assertEqual(fp.encoding, current_locale_encoding) +# AssertionError: 'ANSI_X3.4-1968' != 'ISO-8859-1' +# +# ---------------------------------------------------------------------- +# +# def test_future_moves_tkinter(self): +# """ +# Do the tkinter imports work? +# """ +# from future.moves import tkinter +# from future.moves.tkinter import dialog +# from future.moves.tkinter import filedialog +# from future.moves.tkinter import scrolledtext +# from future.moves.tkinter import simpledialog +# from future.moves.tkinter import tix +# from future.moves.tkinter import constants +# from future.moves.tkinter import dnd +# from future.moves.tkinter import colorchooser +# from future.moves.tkinter import commondialog +# from future.moves.tkinter import font +# from future.moves.tkinter import messagebox + +if __name__ == '__main__': + unittest.main() diff --git a/future/tests/test_str.py b/tests/test_future/test_str.py similarity index 61% rename from future/tests/test_str.py rename to tests/test_future/test_str.py index dd351878..51085481 100644 --- a/future/tests/test_str.py +++ b/tests/test_future/test_str.py @@ -6,7 +6,7 @@ from __future__ import absolute_import, unicode_literals, print_function from future.builtins import * from future import utils -from future.tests.base import unittest +from future.tests.base import unittest, expectedFailurePY2 import os @@ -19,6 +19,14 @@ def test_str(self): self.assertEqual(str('blah'), u'blah') # u'' prefix: Py3.3 and Py2 only self.assertEqual(str(b'1234'), "b'1234'") + def test_bool_str(self): + s1 = str(u'abc') + s2 = u'abc' + s3 = str(u'') + s4 = u'' + self.assertEqual(bool(s1), bool(s2)) + self.assertEqual(bool(s3), bool(s4)) + def test_os_path_join(self): """ Issue #15: can't os.path.join(u'abc', str(u'def')) @@ -78,9 +86,32 @@ def test_str_is_str(self): def test_str_fromhex(self): self.assertFalse(hasattr(str, 'fromhex')) + def test_str_hasattr_decode(self): + """ + This test tests whether hasattr(s, 'decode') is False, like it is on Py3. + + Sometimes code (such as http.client in Py3.3) checks hasattr(mystring, + 'decode') to determine if a string-like thing needs encoding. It would + be nice to have this return False so the string can be treated on Py2 + like a Py3 string. + """ + s = str(u'abcd') + self.assertFalse(hasattr(s, 'decode')) + self.assertTrue(hasattr(s, 'encode')) + def test_isinstance_str(self): self.assertTrue(isinstance(str('blah'), str)) + def test_isinstance_str_subclass(self): + """ + Issue #89 + """ + value = str(u'abc') + class Magic(str): + pass + self.assertTrue(isinstance(value, str)) + self.assertFalse(isinstance(value, Magic)) + def test_str_getitem(self): s = str('ABCD') self.assertNotEqual(s[0], 65) @@ -170,6 +201,30 @@ def test_str_join_bytes(self): with self.assertRaises(TypeError): s.join(byte_strings2) + def test_str_join_staticmethod(self): + """ + Issue #33 + """ + c = str.join('-', ['a', 'b']) + self.assertEqual(c, 'a-b') + self.assertEqual(type(c), str) + + def test_str_join_staticmethod_workaround_1(self): + """ + Issue #33 + """ + c = str('-').join(['a', 'b']) + self.assertEqual(c, 'a-b') + self.assertEqual(type(c), str) + + def test_str_join_staticmethod_workaround_2(self): + """ + Issue #33 + """ + c = str.join(str('-'), ['a', 'b']) + self.assertEqual(c, 'a-b') + self.assertEqual(type(c), str) + def test_str_replace(self): s = str('ABCD') c = s.replace('A', 'F') @@ -203,7 +258,7 @@ def test_str_contains_something(self): if utils.PY2: self.assertTrue(b'A' in s) with self.assertRaises(TypeError): - bytes(b'A') in s + bytes(b'A') in s with self.assertRaises(TypeError): 65 in s # unlike bytes @@ -308,6 +363,29 @@ def test_eq(self): self.assertFalse(b'ABCD' == s) self.assertFalse(bytes(b'ABCD') == s) + # We want to ensure comparison against unknown types return + # NotImplemented so that the interpreter can rerun the test with the + # other class. We expect the operator to return False if both return + # NotImplemented. + class OurCustomString(object): + def __init__(self, string): + self.string = string + + def __eq__(self, other): + return NotImplemented + + our_str = OurCustomString("foobar") + new_str = str("foobar") + + self.assertFalse(our_str == new_str) + self.assertFalse(new_str == our_str) + self.assertIs(new_str.__eq__(our_str), NotImplemented) + self.assertIs(our_str.__eq__(new_str), NotImplemented) + + def test_hash(self): + s = str('ABCD') + self.assertIsInstance(hash(s),int) + def test_ne(self): s = str('ABCD') self.assertNotEqual('A', s) @@ -327,10 +405,6 @@ def test_cmp(self): s > 3 with self.assertRaises(TypeError): s < 1000 - with self.assertRaises(TypeError): - s > b'XYZ' - with self.assertRaises(TypeError): - s < b'XYZ' with self.assertRaises(TypeError): s <= 3 with self.assertRaises(TypeError): @@ -378,5 +452,140 @@ def test_rmul(self): with self.assertRaises(TypeError): (3.3 + 3j) * s + @unittest.skip('Fails on Python <= 2.7.6 due to string subclass slicing bug') + def test_slice(self): + """ + Do slices return newstr objects? + """ + s = str(u'abcd') + self.assertEqual(s[:2], u'ab') + self.assertEqual(type(s[:2]), str) + self.assertEqual(s[-2:], u'cd') + self.assertEqual(type(s[-2:]), str) + + @unittest.skip('Fails on Python <= 2.7.6 due to string subclass slicing bug') + def test_subclassing(self): + """ + Can newstr be subclassed and do str methods then return instances of + the same class? (This is the Py3 behaviour). + """ + class SubClass(str): + pass + s = SubClass(u'abcd') + self.assertEqual(type(s), SubClass) + self.assertEqual(type(s + s), str) + self.assertEqual(type(s[0]), str) + self.assertEqual(type(s[:2]), str) + self.assertEqual(type(s.join([u'_', u'_', u'_'])), str) + + def test_subclassing_2(self): + """ + Tests __new__ method in subclasses. Fails in versions <= 0.11.4 + """ + class SubClass(str): + def __new__(cls, *args, **kwargs): + self = str.__new__(cls, *args, **kwargs) + assert type(self) == SubClass + return self + s = SubClass(u'abcd') + self.assertTrue(True) + + # From Python 3.3: test_unicode.py + def checkequalnofix(self, result, object, methodname, *args): + method = getattr(object, methodname) + realresult = method(*args) + self.assertEqual(realresult, result) + self.assertTrue(type(realresult) is type(result)) + + # if the original is returned make sure that + # this doesn't happen with subclasses + if realresult is object: + class usub(str): + def __repr__(self): + return 'usub(%r)' % str.__repr__(self) + object = usub(object) + method = getattr(object, methodname) + realresult = method(*args) + self.assertEqual(realresult, result) + self.assertTrue(object is not realresult) + + type2test = str + + def test_maketrans_translate(self): + # these work with plain translate() + self.checkequalnofix('bbbc', 'abababc', 'translate', + {ord('a'): None}) + self.checkequalnofix('iiic', 'abababc', 'translate', + {ord('a'): None, ord('b'): ord('i')}) + self.checkequalnofix('iiix', 'abababc', 'translate', + {ord('a'): None, ord('b'): ord('i'), ord('c'): 'x'}) + self.checkequalnofix('c', 'abababc', 'translate', + {ord('a'): None, ord('b'): ''}) + self.checkequalnofix('xyyx', 'xzx', 'translate', + {ord('z'): 'yy'}) + # this needs maketrans() + self.checkequalnofix('abababc', 'abababc', 'translate', + {'b': ''}) + tbl = self.type2test.maketrans({'a': None, 'b': ''}) + self.checkequalnofix('c', 'abababc', 'translate', tbl) + # test alternative way of calling maketrans() + tbl = self.type2test.maketrans('abc', 'xyz', 'd') + self.checkequalnofix('xyzzy', 'abdcdcbdddd', 'translate', tbl) + + self.assertRaises(TypeError, self.type2test.maketrans) + self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg') + self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def') + self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 2) + self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def', 2) + self.assertRaises(ValueError, self.type2test.maketrans, {'xy': 2}) + self.assertRaises(TypeError, self.type2test.maketrans, {(1,): 2}) + + self.assertRaises(TypeError, 'hello'.translate) + self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz') + + @expectedFailurePY2 + def test_multiple_inheritance(self): + """ + Issue #96 (for newstr instead of newobject) + """ + if utils.PY2: + from collections import Container + else: + from collections.abc import Container + + class Base(str): + pass + + class Foo(Base, Container): + def __contains__(self, item): + return False + + @expectedFailurePY2 + def test_with_metaclass_and_str(self): + """ + Issue #91 (for newstr instead of newobject) + """ + from future.utils import with_metaclass + + class MetaClass(type): + pass + + class TestClass(with_metaclass(MetaClass, str)): + pass + + def test_surrogateescape_encoding(self): + """ + Tests whether surrogateescape encoding works correctly. + """ + pairs = [(u'\udcc3', b'\xc3'), + (u'\udcff', b'\xff')] + + for (s, b) in pairs: + encoded = str(s).encode('utf-8', 'surrogateescape') + self.assertEqual(b, encoded) + self.assertTrue(isinstance(encoded, bytes)) + self.assertEqual(s, encoded.decode('utf-8', 'surrogateescape')) + + if __name__ == '__main__': unittest.main() diff --git a/tests/test_future/test_super.py b/tests/test_future/test_super.py new file mode 100644 index 00000000..3cb23d69 --- /dev/null +++ b/tests/test_future/test_super.py @@ -0,0 +1,359 @@ +"""Unit tests for new super() implementation.""" + +from __future__ import absolute_import, division, unicode_literals +import sys + +from future.tests.base import unittest, skip26, expectedFailurePY2 +from future import utils +from future.builtins import super + + +class A(object): + def f(self): + return 'A' + @classmethod + def cm(cls): + return (cls, 'A') + +class B(A): + def f(self): + return super().f() + 'B' + @classmethod + def cm(cls): + return (cls, super().cm(), 'B') + +class C(A): + def f(self): + return super().f() + 'C' + @classmethod + def cm(cls): + return (cls, super().cm(), 'C') + +class D(C, B): + def f(self): + return super().f() + 'D' + def cm(cls): + return (cls, super().cm(), 'D') + +class E(D): + pass + +class F(E): + f = E.f + +class G(A): + pass + + +class TestSuper(unittest.TestCase): + + def test_basics_working(self): + self.assertEqual(D().f(), 'ABCD') + + def test_class_getattr_working(self): + self.assertEqual(D.f(D()), 'ABCD') + + def test_subclass_no_override_working(self): + self.assertEqual(E().f(), 'ABCD') + self.assertEqual(E.f(E()), 'ABCD') + + @expectedFailurePY2 # not working yet: infinite loop + def test_unbound_method_transfer_working(self): + self.assertEqual(F().f(), 'ABCD') + self.assertEqual(F.f(F()), 'ABCD') + + def test_class_methods_still_working(self): + self.assertEqual(A.cm(), (A, 'A')) + self.assertEqual(A().cm(), (A, 'A')) + self.assertEqual(G.cm(), (G, 'A')) + self.assertEqual(G().cm(), (G, 'A')) + + def test_super_in_class_methods_working(self): + d = D() + self.assertEqual(d.cm(), (d, (D, (D, (D, 'A'), 'B'), 'C'), 'D')) + e = E() + self.assertEqual(e.cm(), (e, (E, (E, (E, 'A'), 'B'), 'C'), 'D')) + + def test_super_with_closure(self): + # Issue4360: super() did not work in a function that + # contains a closure + class E(A): + def f(self): + def nested(): + self + return super().f() + 'E' + + self.assertEqual(E().f(), 'AE') + + # We declare this test invalid: __class__ should be a class. + # def test___class___set(self): + # # See issue #12370 + # class X(A): + # def f(self): + # return super().f() + # __class__ = 413 + # x = X() + # self.assertEqual(x.f(), 'A') + # self.assertEqual(x.__class__, 413) + + @unittest.skipIf(utils.PY2, "no __class__ on Py2") + def test___class___instancemethod(self): + # See issue #14857 + class X(object): + def f(self): + return __class__ + self.assertIs(X().f(), X) + + @unittest.skipIf(utils.PY2, "no __class__ on Py2") + def test___class___classmethod(self): + # See issue #14857 + class X(object): + @classmethod + def f(cls): + return __class__ + self.assertIs(X.f(), X) + + @unittest.skipIf(utils.PY2, "no __class__ on Py2") + def test___class___staticmethod(self): + # See issue #14857 + class X(object): + @staticmethod + def f(): + return __class__ + self.assertIs(X.f(), X) + + def test_obscure_super_errors(self): + def f(): + super() + self.assertRaises(RuntimeError, f) + def f(x): + del x + super() + self.assertRaises(RuntimeError, f, None) + # class X(object): + # def f(x): + # nonlocal __class__ + # del __class__ + # super() + # self.assertRaises(RuntimeError, X().f) + + def test_cell_as_self(self): + class X(object): + def meth(self): + super() + + def f(): + k = X() + def g(): + return k + return g + c = f().__closure__[0] + self.assertRaises(TypeError, X.meth, c) + + def test_properties(self): + class Harmless(object): + bomb = '' + + def walk(self): + return self.bomb + + class Dangerous(Harmless): + @property + def bomb(self): + raise Exception("Kaboom") + + def walk(self): + return super().walk() + + class Elite(Dangerous): + bomb = 'Defused' + + self.assertEqual(Elite().walk(), 'Defused') + + def test_metaclass(self): + class Meta(type): + def __init__(cls, name, bases, clsdict): + super().__init__(name, bases, clsdict) + + try: + class Base(object): + __metaclass__ = Meta + except Exception as e: + self.fail('raised %s with a custom metaclass' + % type(e).__name__) + + +class TestSuperFromTestDescrDotPy(unittest.TestCase): + """ + These are from Python 3.3.5/Lib/test/test_descr.py + """ + @skip26 + def test_classmethods(self): + # Testing class methods... + class C(object): + def foo(*a): return a + goo = classmethod(foo) + c = C() + self.assertEqual(C.goo(1), (C, 1)) + self.assertEqual(c.goo(1), (C, 1)) + self.assertEqual(c.foo(1), (c, 1)) + class D(C): + pass + d = D() + self.assertEqual(D.goo(1), (D, 1)) + self.assertEqual(d.goo(1), (D, 1)) + self.assertEqual(d.foo(1), (d, 1)) + self.assertEqual(D.foo(d, 1), (d, 1)) + # Test for a specific crash (SF bug 528132) + def f(cls, arg): return (cls, arg) + ff = classmethod(f) + self.assertEqual(ff.__get__(0, int)(42), (int, 42)) + self.assertEqual(ff.__get__(0)(42), (int, 42)) + + # Test super() with classmethods (SF bug 535444) + self.assertEqual(C.goo.__self__, C) + self.assertEqual(D.goo.__self__, D) + self.assertEqual(super(D,D).goo.__self__, D) + self.assertEqual(super(D,d).goo.__self__, D) + self.assertEqual(super(D,D).goo(), (D,)) + self.assertEqual(super(D,d).goo(), (D,)) + + # Verify that a non-callable will raise + meth = classmethod(1).__get__(1) + self.assertRaises(TypeError, meth) + + # Verify that classmethod() doesn't allow keyword args + try: + classmethod(f, kw=1) + except TypeError: + pass + else: + self.fail("classmethod shouldn't accept keyword args") + + # cm = classmethod(f) + # self.assertEqual(cm.__dict__, {}) + # cm.x = 42 + # self.assertEqual(cm.x, 42) + # self.assertEqual(cm.__dict__, {"x" : 42}) + # del cm.x + # self.assertTrue(not hasattr(cm, "x")) + + def test_supers(self): + # Testing super... + + class A(object): + def meth(self, a): + return "A(%r)" % a + + self.assertEqual(A().meth(1), "A(1)") + + class B(A): + def __init__(self): + self.__super = super(B, self) + def meth(self, a): + return "B(%r)" % a + self.__super.meth(a) + + self.assertEqual(B().meth(2), "B(2)A(2)") + + class C(A): + def meth(self, a): + return "C(%r)" % a + self.__super.meth(a) + C._C__super = super(C) + + self.assertEqual(C().meth(3), "C(3)A(3)") + + class D(C, B): + def meth(self, a): + return "D(%r)" % a + super(D, self).meth(a) + + self.assertEqual(D().meth(4), "D(4)C(4)B(4)A(4)") + + # # Test for subclassing super + + # class mysuper(super): + # def __init__(self, *args): + # return super(mysuper, self).__init__(*args) + + # class E(D): + # def meth(self, a): + # return "E(%r)" % a + mysuper(E, self).meth(a) + + # self.assertEqual(E().meth(5), "E(5)D(5)C(5)B(5)A(5)") + + # class F(E): + # def meth(self, a): + # s = self.__super # == mysuper(F, self) + # return "F(%r)[%s]" % (a, s.__class__.__name__) + s.meth(a) + # F._F__super = mysuper(F) + + # self.assertEqual(F().meth(6), "F(6)[mysuper]E(6)D(6)C(6)B(6)A(6)") + + # Make sure certain errors are raised + + try: + super(D, 42) + except TypeError: + pass + else: + self.fail("shouldn't allow super(D, 42)") + + try: + super(D, C()) + except TypeError: + pass + else: + self.fail("shouldn't allow super(D, C())") + + try: + super(D).__get__(12) + except TypeError: + pass + else: + self.fail("shouldn't allow super(D).__get__(12)") + + try: + super(D).__get__(C()) + except TypeError: + pass + else: + self.fail("shouldn't allow super(D).__get__(C())") + + # Make sure data descriptors can be overridden and accessed via super + # (new feature in Python 2.3) + + class DDbase(object): + def getx(self): return 42 + x = property(getx) + + class DDsub(DDbase): + def getx(self): return "hello" + x = property(getx) + + dd = DDsub() + self.assertEqual(dd.x, "hello") + self.assertEqual(super(DDsub, dd).x, 42) + + # Ensure that super() lookup of descriptor from classmethod + # works (SF ID# 743627) + + class Base(object): + aProp = property(lambda self: "foo") + + class Sub(Base): + @classmethod + def test(klass): + return super(Sub,klass).aProp + + self.assertEqual(Sub.test(), Base.aProp) + + # Verify that super() doesn't allow keyword args + try: + super(Base, kw=1) + except TypeError: + pass + else: + self.assertEqual("super shouldn't accept keyword args") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_future/test_surrogateescape.py b/tests/test_future/test_surrogateescape.py new file mode 100644 index 00000000..7789ce9d --- /dev/null +++ b/tests/test_future/test_surrogateescape.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +""" +Tests for the surrogateescape codec +""" + +from __future__ import absolute_import, division, unicode_literals +from future.builtins import (bytes, dict, int, range, round, str, super, + ascii, chr, hex, input, next, oct, open, pow, + filter, map, zip) +from future.utils.surrogateescape import register_surrogateescape +from future.tests.base import unittest, expectedFailurePY26, expectedFailurePY2 + + +class TestSurrogateEscape(unittest.TestCase): + def setUp(self): + register_surrogateescape() + + @expectedFailurePY26 # Python 2.6 str.decode() takes no keyword args + def test_surrogateescape(self): + """ + From the backport of the email package + """ + s = b'From: foo@bar.com\nTo: baz\nMime-Version: 1.0\nContent-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: base64\n\ncMO2c3RhbA\xc3\xa1=\n' + u = 'From: foo@bar.com\nTo: baz\nMime-Version: 1.0\nContent-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: base64\n\ncMO2c3RhbA\udcc3\udca1=\n' + s2 = s.decode('ASCII', errors='surrogateescape') + self.assertEqual(s2, u) + + def test_encode_ascii_surrogateescape(self): + """ + This crops up in the email module. It would be nice if it worked ... + """ + payload = str(u'cMO2c3RhbA\udcc3\udca1=\n') + b = payload.encode('ascii', 'surrogateescape') + self.assertEqual(b, b'cMO2c3RhbA\xc3\xa1=\n') + + def test_encode_ascii_unicode(self): + """ + Verify that exceptions are raised properly. + """ + self.assertRaises(UnicodeEncodeError, u'\N{SNOWMAN}'.encode, 'US-ASCII', 'surrogateescape') + + @expectedFailurePY2 + def test_encode_ascii_surrogateescape_non_newstr(self): + """ + As above but without a newstr object. Fails on Py2. + """ + payload = u'cMO2c3RhbA\udcc3\udca1=\n' + b = payload.encode('ascii', 'surrogateescape') + self.assertEqual(b, b'cMO2c3RhbA\xc3\xa1=\n') + + +class SurrogateEscapeTest(unittest.TestCase): + """ + These tests are from Python 3.3's test suite + """ + def setUp(self): + register_surrogateescape() + + def test_utf8(self): + # Bad byte + self.assertEqual(b"foo\x80bar".decode("utf-8", "surrogateescape"), + "foo\udc80bar") + self.assertEqual(str("foo\udc80bar").encode("utf-8", "surrogateescape"), + b"foo\x80bar") + # bad-utf-8 encoded surrogate + # self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "surrogateescape"), + # "\udced\udcb0\udc80") + self.assertEqual(str("\udced\udcb0\udc80").encode("utf-8", "surrogateescape"), + b"\xed\xb0\x80") + + def test_ascii(self): + # bad byte + self.assertEqual(b"foo\x80bar".decode("ascii", "surrogateescape"), + "foo\udc80bar") + # Fails: + # self.assertEqual("foo\udc80bar".encode("ascii", "surrogateescape"), + # b"foo\x80bar") + + @expectedFailurePY2 + def test_charmap(self): + # bad byte: \xa5 is unmapped in iso-8859-3 + self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "surrogateescape"), + "foo\udca5bar") + self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"), + b"foo\xa5bar") + + def test_latin1(self): + # Issue6373 + self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin-1", "surrogateescape"), + b"\xe4\xeb\xef\xf6\xfc") + + # FIXME: + @expectedFailurePY2 + def test_encoding_works_normally(self): + """ + Test that encoding into various encodings (particularly utf-16) + still works with the surrogateescape error handler in action ... + """ + TEST_UNICODE_STR = u'ℝεα∂@ßʟ℮ ☂ℯṧт υηḯ¢☺ḓ℮' + # Tk icon as a .gif: + TEST_BYTE_STR = b'GIF89a\x0e\x00\x0b\x00\x80\xff\x00\xff\x00\x00\xc0\xc0\xc0!\xf9\x04\x01\x00\x00\x01\x00,\x00\x00\x00\x00\x0e\x00\x0b\x00@\x02\x1f\x0c\x8e\x10\xbb\xcan\x90\x99\xaf&\xd8\x1a\xce\x9ar\x06F\xd7\xf1\x90\xa1c\x9e\xe8\x84\x99\x89\x97\xa2J\x01\x00;\x1a\x14\x00;;\xba\nD\x14\x00\x00;;' + # s1 = 'quéstionable' + s1 = TEST_UNICODE_STR + b1 = s1.encode('utf-8') + b2 = s1.encode('utf-16') + # b3 = s1.encode('latin-1') + self.assertEqual(b1, str(s1).encode('utf-8', 'surrogateescape')) + self.assertEqual(b2, str(s1).encode('utf-16', 'surrogateescape')) + # self.assertEqual(b3, str(s1).encode('latin-1', 'surrogateescape')) + + s2 = 'きたないのよりきれいな方がいい' + b4 = s2.encode('utf-8') + b5 = s2.encode('utf-16') + b6 = s2.encode('shift-jis') + self.assertEqual(b4, str(s2).encode('utf-8', 'surrogateescape')) + self.assertEqual(b5, str(s2).encode('utf-16', 'surrogateescape')) + self.assertEqual(b6, str(s2).encode('shift-jis', 'surrogateescape')) + + def test_decoding_works_normally(self): + """ + Test that decoding into various encodings (particularly utf-16) + still works with the surrogateescape error handler in action ... + """ + s1 = 'quéstionable' + b1 = s1.encode('utf-8') + b2 = s1.encode('utf-16') + b3 = s1.encode('latin-1') + self.assertEqual(s1, b1.decode('utf-8', 'surrogateescape')) + self.assertEqual(s1, b2.decode('utf-16', 'surrogateescape')) + self.assertEqual(s1, b3.decode('latin-1', 'surrogateescape')) + + s2 = '文' + b4 = s2.encode('utf-8') + b5 = s2.encode('utf-16') + b6 = s2.encode('shift-jis') + self.assertEqual(s2, b4.decode('utf-8', 'surrogateescape')) + self.assertEqual(s2, b5.decode('utf-16', 'surrogateescape')) + self.assertEqual(s2, b6.decode('shift-jis', 'surrogateescape')) + + +if __name__ == '__main__': + unittest.main() diff --git a/libfuturize/test_scripts/py3/test_urllib.py b/tests/test_future/test_urllib.py similarity index 79% rename from libfuturize/test_scripts/py3/test_urllib.py rename to tests/test_future/test_urllib.py index a2d3282d..64e89760 100644 --- a/libfuturize/test_scripts/py3/test_urllib.py +++ b/tests/test_future/test_urllib.py @@ -1,20 +1,24 @@ -"""Regresssion tests for urllib""" +"""Regression tests for urllib""" +from __future__ import absolute_import, division, unicode_literals -import urllib.parse -import urllib.request -import urllib.error -import http.client -import email.message import io -import unittest -from test import support import os import sys import tempfile - +from nturl2path import url2pathname, pathname2url from base64 import b64encode import collections +from future.builtins import bytes, chr, hex, open, range, str, int +from future.backports.urllib import parse as urllib_parse +from future.backports.urllib import request as urllib_request +from future.backports.urllib import error as urllib_error +from future.backports.http import client as http_client +from future.backports.test import support +from future.backports.email import message as email_message +from future.tests.base import unittest, skip26, expectedFailurePY26 + + def hexescape(char): """Escape char as RFC 2396 specifies""" hex_repr = hex(ord(char))[2:].upper() @@ -24,16 +28,18 @@ def hexescape(char): # Shortcut for testing FancyURLopener _urlopener = None + + def urlopen(url, data=None, proxies=None): """urlopen(url [, data]) -> open file-like object""" global _urlopener if proxies is not None: - opener = urllib.request.FancyURLopener(proxies=proxies) + opener = urllib_request.FancyURLopener(proxies=proxies) elif not _urlopener: with support.check_warnings( ('FancyURLopener style of invoking requests is deprecated.', DeprecationWarning)): - opener = urllib.request.FancyURLopener() + opener = urllib_request.FancyURLopener() _urlopener = opener else: opener = _urlopener @@ -70,7 +76,7 @@ def close(self): if self.io_refs == 0: io.BytesIO.close(self) - class FakeHTTPConnection(http.client.HTTPConnection): + class FakeHTTPConnection(http_client.HTTPConnection): # buffer to store data for verification in urlopen tests. buf = None @@ -78,11 +84,11 @@ class FakeHTTPConnection(http.client.HTTPConnection): def connect(self): self.sock = FakeSocket(fakedata) - self._connection_class = http.client.HTTPConnection - http.client.HTTPConnection = FakeHTTPConnection + self._connection_class = http_client.HTTPConnection + http_client.HTTPConnection = FakeHTTPConnection def unfakehttp(self): - http.client.HTTPConnection = self._connection_class + http_client.HTTPConnection = self._connection_class class urlopen_FileTests(unittest.TestCase): @@ -147,7 +153,7 @@ def test_close(self): self.returned_obj.close() def test_info(self): - self.assertIsInstance(self.returned_obj.info(), email.message.Message) + self.assertIsInstance(self.returned_obj.info(), email_message.Message) def test_geturl(self): self.assertEqual(self.returned_obj.geturl(), self.pathname) @@ -165,7 +171,7 @@ def test_iter(self): self.assertEqual(line, self.text) def test_relativelocalfile(self): - self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname) + self.assertRaises(ValueError,urllib_request.urlopen,'./' + self.pathname) class ProxyTests(unittest.TestCase): @@ -184,12 +190,12 @@ def tearDown(self): def test_getproxies_environment_keep_no_proxies(self): self.env.set('NO_PROXY', 'localhost') - proxies = urllib.request.getproxies_environment() + proxies = urllib_request.getproxies_environment() # getproxies_environment use lowered case truncated (no '_proxy') keys self.assertEqual('localhost', proxies['no']) # List of no_proxies with space. self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com') - self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com')) + self.assertTrue(urllib_request.proxy_bypass_environment('anotherdomain.com')) class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin): """Test urlopen() opening a fake http connection.""" @@ -210,7 +216,7 @@ def test_url_fragment(self): url = 'http://docs.python.org/library/urllib.html#OK' self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") try: - fp = urllib.request.urlopen(url) + fp = urllib_request.urlopen(url) self.assertEqual(fp.geturl(), url) finally: self.unfakehttp() @@ -223,14 +229,17 @@ def test_willclose(self): finally: self.unfakehttp() + @expectedFailurePY26 def test_read_0_9(self): # "0.9" response accepted (but not "simple responses" without # a status line) self.check_read(b"0.9") + @expectedFailurePY26 def test_read_1_0(self): self.check_read(b"1.0") + @expectedFailurePY26 def test_read_1_1(self): self.check_read(b"1.1") @@ -257,7 +266,7 @@ def test_invalid_redirect(self): Content-Type: text/html; charset=iso-8859-1 ''') try: - self.assertRaises(urllib.error.HTTPError, urlopen, + self.assertRaises(urllib_error.HTTPError, urlopen, "http://python.org/") finally: self.unfakehttp() @@ -304,6 +313,7 @@ def test_ftp_nonexisting(self): urlopen('ftp://localhost/a/file/which/doesnot/exists.py') + @expectedFailurePY26 def test_userpass_inurl(self): self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") try: @@ -315,12 +325,13 @@ def test_userpass_inurl(self): finally: self.unfakehttp() + @expectedFailurePY26 def test_userpass_inurl_w_spaces(self): self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") try: userpass = "a b:c d" url = "http://{0}@python.org/".format(userpass) - fakehttp_wrapper = http.client.HTTPConnection + fakehttp_wrapper = http_client.HTTPConnection authorization = ("Authorization: Basic %s\r\n" % b64encode(userpass.encode("ASCII")).decode("ASCII")) fp = urlopen(url) @@ -336,7 +347,7 @@ def test_userpass_inurl_w_spaces(self): def test_URLopener_deprecation(self): with support.check_warnings(('',DeprecationWarning)): - urllib.request.URLopener() + urllib_request.URLopener() class urlretrieve_FileTests(unittest.TestCase): """Test urllib.urlretrieve() on local files""" @@ -374,7 +385,7 @@ def constructLocalFileUrl(self, filePath): filePath.encode("utf-8") except UnicodeEncodeError: raise unittest.SkipTest("filePath is not encodable to utf8") - return "file://%s" % urllib.request.pathname2url(filePath) + return "file://%s" % urllib_request.pathname2url(filePath) def createNewTempFile(self, data=b""): """Creates a new temporary file containing the specified data, @@ -398,9 +409,9 @@ def registerFileForCleanUp(self, fileName): def test_basic(self): # Make sure that a local file just gets its own location returned and # a headers value is returned. - result = urllib.request.urlretrieve("file:%s" % support.TESTFN) + result = urllib_request.urlretrieve("file:%s" % support.TESTFN) self.assertEqual(result[0], support.TESTFN) - self.assertIsInstance(result[1], email.message.Message, + self.assertIsInstance(result[1], email_message.Message, "did not get a email.message.Message instance " "as second returned value") @@ -408,7 +419,7 @@ def test_copy(self): # Test that setting the filename argument works. second_temp = "%s.2" % support.TESTFN self.registerFileForCleanUp(second_temp) - result = urllib.request.urlretrieve(self.constructLocalFileUrl( + result = urllib_request.urlretrieve(self.constructLocalFileUrl( support.TESTFN), second_temp) self.assertEqual(second_temp, result[0]) self.assertTrue(os.path.exists(second_temp), "copy of the file was not " @@ -432,7 +443,7 @@ def hooktester(block_count, block_read_size, file_size, count_holder=[0]): count_holder[0] = count_holder[0] + 1 second_temp = "%s.2" % support.TESTFN self.registerFileForCleanUp(second_temp) - urllib.request.urlretrieve( + urllib_request.urlretrieve( self.constructLocalFileUrl(support.TESTFN), second_temp, hooktester) @@ -442,7 +453,7 @@ def test_reporthook_0_bytes(self): def hooktester(block_count, block_read_size, file_size, _report=report): _report.append((block_count, block_read_size, file_size)) srcFileName = self.createNewTempFile() - urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), + urllib_request.urlretrieve(self.constructLocalFileUrl(srcFileName), support.TESTFN, hooktester) self.assertEqual(len(report), 1) self.assertEqual(report[0][2], 0) @@ -455,7 +466,7 @@ def test_reporthook_5_bytes(self): def hooktester(block_count, block_read_size, file_size, _report=report): _report.append((block_count, block_read_size, file_size)) srcFileName = self.createNewTempFile(b"x" * 5) - urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), + urllib_request.urlretrieve(self.constructLocalFileUrl(srcFileName), support.TESTFN, hooktester) self.assertEqual(len(report), 2) self.assertEqual(report[0][2], 5) @@ -469,7 +480,7 @@ def test_reporthook_8193_bytes(self): def hooktester(block_count, block_read_size, file_size, _report=report): _report.append((block_count, block_read_size, file_size)) srcFileName = self.createNewTempFile(b"x" * 8193) - urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), + urllib_request.urlretrieve(self.constructLocalFileUrl(srcFileName), support.TESTFN, hooktester) self.assertEqual(len(report), 3) self.assertEqual(report[0][2], 8193) @@ -481,6 +492,7 @@ def hooktester(block_count, block_read_size, file_size, _report=report): class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin): """Test urllib.urlretrieve() using fake http connections""" + @skip26 def test_short_content_raises_ContentTooShortError(self): self.fakehttp(b'''HTTP/1.1 200 OK Date: Wed, 02 Jan 2008 03:03:54 GMT @@ -495,13 +507,14 @@ def test_short_content_raises_ContentTooShortError(self): def _reporthook(par1, par2, par3): pass - with self.assertRaises(urllib.error.ContentTooShortError): + with self.assertRaises(urllib_error.ContentTooShortError): try: - urllib.request.urlretrieve('http://example.com/', + urllib_request.urlretrieve('http://example.com/', reporthook=_reporthook) finally: self.unfakehttp() + @skip26 def test_short_content_raises_ContentTooShortError_without_reporthook(self): self.fakehttp(b'''HTTP/1.1 200 OK Date: Wed, 02 Jan 2008 03:03:54 GMT @@ -512,9 +525,9 @@ def test_short_content_raises_ContentTooShortError_without_reporthook(self): FF ''') - with self.assertRaises(urllib.error.ContentTooShortError): + with self.assertRaises(urllib_error.ContentTooShortError): try: - urllib.request.urlretrieve('http://example.com/') + urllib_request.urlretrieve('http://example.com/') finally: self.unfakehttp() @@ -551,42 +564,42 @@ def test_never_quote(self): "abcdefghijklmnopqrstuvwxyz", "0123456789", "_.-"]) - result = urllib.parse.quote(do_not_quote) + result = urllib_parse.quote(do_not_quote) self.assertEqual(do_not_quote, result, "using quote(): %r != %r" % (do_not_quote, result)) - result = urllib.parse.quote_plus(do_not_quote) + result = urllib_parse.quote_plus(do_not_quote) self.assertEqual(do_not_quote, result, "using quote_plus(): %r != %r" % (do_not_quote, result)) def test_default_safe(self): # Test '/' is default value for 'safe' parameter - self.assertEqual(urllib.parse.quote.__defaults__[0], '/') + self.assertEqual(urllib_parse.quote.__defaults__[0], '/') def test_safe(self): # Test setting 'safe' parameter does what it should do quote_by_default = "<>" - result = urllib.parse.quote(quote_by_default, safe=quote_by_default) + result = urllib_parse.quote(quote_by_default, safe=quote_by_default) self.assertEqual(quote_by_default, result, "using quote(): %r != %r" % (quote_by_default, result)) - result = urllib.parse.quote_plus(quote_by_default, + result = urllib_parse.quote_plus(quote_by_default, safe=quote_by_default) self.assertEqual(quote_by_default, result, "using quote_plus(): %r != %r" % (quote_by_default, result)) # Safe expressed as bytes rather than str - result = urllib.parse.quote(quote_by_default, safe=b"<>") + result = urllib_parse.quote(quote_by_default, safe=b"<>") self.assertEqual(quote_by_default, result, "using quote(): %r != %r" % (quote_by_default, result)) # "Safe" non-ASCII characters should have no effect # (Since URIs are not allowed to have non-ASCII characters) - result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc") - expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="") + result = urllib_parse.quote("a\xfcb", encoding="latin-1", safe="\xfc") + expect = urllib_parse.quote("a\xfcb", encoding="latin-1", safe="") self.assertEqual(expect, result, "using quote(): %r != %r" % (expect, result)) # Same as above, but using a bytes rather than str - result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc") - expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="") + result = urllib_parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc") + expect = urllib_parse.quote("a\xfcb", encoding="latin-1", safe="") self.assertEqual(expect, result, "using quote(): %r != %r" % (expect, result)) @@ -599,12 +612,12 @@ def test_default_quoting(self): should_quote.append(chr(127)) # For 0x7F should_quote = ''.join(should_quote) for char in should_quote: - result = urllib.parse.quote(char) + result = urllib_parse.quote(char) self.assertEqual(hexescape(char), result, "using quote(): " "%s should be escaped to %s, not %s" % (char, hexescape(char), result)) - result = urllib.parse.quote_plus(char) + result = urllib_parse.quote_plus(char) self.assertEqual(hexescape(char), result, "using quote_plus(): " "%s should be escapes to %s, not %s" % @@ -612,56 +625,56 @@ def test_default_quoting(self): del should_quote partial_quote = "ab[]cd" expected = "ab%5B%5Dcd" - result = urllib.parse.quote(partial_quote) + result = urllib_parse.quote(partial_quote) self.assertEqual(expected, result, "using quote(): %r != %r" % (expected, result)) - result = urllib.parse.quote_plus(partial_quote) + result = urllib_parse.quote_plus(partial_quote) self.assertEqual(expected, result, "using quote_plus(): %r != %r" % (expected, result)) def test_quoting_space(self): # Make sure quote() and quote_plus() handle spaces as specified in # their unique way - result = urllib.parse.quote(' ') + result = urllib_parse.quote(' ') self.assertEqual(result, hexescape(' '), "using quote(): %r != %r" % (result, hexescape(' '))) - result = urllib.parse.quote_plus(' ') + result = urllib_parse.quote_plus(' ') self.assertEqual(result, '+', "using quote_plus(): %r != +" % result) given = "a b cd e f" expect = given.replace(' ', hexescape(' ')) - result = urllib.parse.quote(given) + result = urllib_parse.quote(given) self.assertEqual(expect, result, "using quote(): %r != %r" % (expect, result)) expect = given.replace(' ', '+') - result = urllib.parse.quote_plus(given) + result = urllib_parse.quote_plus(given) self.assertEqual(expect, result, "using quote_plus(): %r != %r" % (expect, result)) def test_quoting_plus(self): - self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'), + self.assertEqual(urllib_parse.quote_plus('alpha+beta gamma'), 'alpha%2Bbeta+gamma') - self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'), + self.assertEqual(urllib_parse.quote_plus('alpha+beta gamma', '+'), 'alpha+beta+gamma') # Test with bytes - self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'), + self.assertEqual(urllib_parse.quote_plus(b'alpha+beta gamma'), 'alpha%2Bbeta+gamma') # Test with safe bytes - self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'), + self.assertEqual(urllib_parse.quote_plus('alpha+beta gamma', b'+'), 'alpha+beta+gamma') def test_quote_bytes(self): # Bytes should quote directly to percent-encoded values given = b"\xa2\xd8ab\xff" expect = "%A2%D8ab%FF" - result = urllib.parse.quote(given) + result = urllib_parse.quote(given) self.assertEqual(expect, result, "using quote(): %r != %r" % (expect, result)) # Encoding argument should raise type error on bytes input - self.assertRaises(TypeError, urllib.parse.quote, given, + self.assertRaises(TypeError, urllib_parse.quote, given, encoding="latin-1") # quote_from_bytes should work the same - result = urllib.parse.quote_from_bytes(given) + result = urllib_parse.quote_from_bytes(given) self.assertEqual(expect, result, "using quote_from_bytes(): %r != %r" % (expect, result)) @@ -670,40 +683,40 @@ def test_quote_with_unicode(self): # Characters in Latin-1 range, encoded by default in UTF-8 given = "\xa2\xd8ab\xff" expect = "%C2%A2%C3%98ab%C3%BF" - result = urllib.parse.quote(given) + result = urllib_parse.quote(given) self.assertEqual(expect, result, "using quote(): %r != %r" % (expect, result)) # Characters in Latin-1 range, encoded by with None (default) - result = urllib.parse.quote(given, encoding=None, errors=None) + result = urllib_parse.quote(given, encoding=None, errors=None) self.assertEqual(expect, result, "using quote(): %r != %r" % (expect, result)) # Characters in Latin-1 range, encoded with Latin-1 given = "\xa2\xd8ab\xff" expect = "%A2%D8ab%FF" - result = urllib.parse.quote(given, encoding="latin-1") + result = urllib_parse.quote(given, encoding="latin-1") self.assertEqual(expect, result, "using quote(): %r != %r" % (expect, result)) # Characters in BMP, encoded by default in UTF-8 given = "\u6f22\u5b57" # "Kanji" expect = "%E6%BC%A2%E5%AD%97" - result = urllib.parse.quote(given) + result = urllib_parse.quote(given) self.assertEqual(expect, result, "using quote(): %r != %r" % (expect, result)) # Characters in BMP, encoded with Latin-1 given = "\u6f22\u5b57" - self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given, + self.assertRaises(UnicodeEncodeError, urllib_parse.quote, given, encoding="latin-1") # Characters in BMP, encoded with Latin-1, with replace error handling given = "\u6f22\u5b57" expect = "%3F%3F" # "??" - result = urllib.parse.quote(given, encoding="latin-1", + result = urllib_parse.quote(given, encoding="latin-1", errors="replace") self.assertEqual(expect, result, "using quote(): %r != %r" % (expect, result)) # Characters in BMP, Latin-1, with xmlcharref error handling given = "\u6f22\u5b57" expect = "%26%2328450%3B%26%2323383%3B" # "漢字" - result = urllib.parse.quote(given, encoding="latin-1", + result = urllib_parse.quote(given, encoding="latin-1", errors="xmlcharrefreplace") self.assertEqual(expect, result, "using quote(): %r != %r" % (expect, result)) @@ -712,13 +725,13 @@ def test_quote_plus_with_unicode(self): # Encoding (latin-1) test for quote_plus given = "\xa2\xd8 \xff" expect = "%A2%D8+%FF" - result = urllib.parse.quote_plus(given, encoding="latin-1") + result = urllib_parse.quote_plus(given, encoding="latin-1") self.assertEqual(expect, result, "using quote_plus(): %r != %r" % (expect, result)) # Errors test for quote_plus given = "ab\u6f22\u5b57 cd" expect = "ab%3F%3F+cd" - result = urllib.parse.quote_plus(given, encoding="latin-1", + result = urllib_parse.quote_plus(given, encoding="latin-1", errors="replace") self.assertEqual(expect, result, "using quote_plus(): %r != %r" % (expect, result)) @@ -737,66 +750,66 @@ def test_unquoting(self): for num in range(128): given = hexescape(chr(num)) expect = chr(num) - result = urllib.parse.unquote(given) + result = urllib_parse.unquote(given) self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) - result = urllib.parse.unquote_plus(given) + result = urllib_parse.unquote_plus(given) self.assertEqual(expect, result, "using unquote_plus(): %r != %r" % (expect, result)) escape_list.append(given) escape_string = ''.join(escape_list) del escape_list - result = urllib.parse.unquote(escape_string) + result = urllib_parse.unquote(escape_string) self.assertEqual(result.count('%'), 1, "using unquote(): not all characters escaped: " "%s" % result) - self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None) - self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ()) + self.assertRaises((TypeError, AttributeError), urllib_parse.unquote, None) + self.assertRaises((TypeError, AttributeError), urllib_parse.unquote, ()) with support.check_warnings(('', BytesWarning), quiet=True): - self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'') + self.assertRaises((TypeError, AttributeError), urllib_parse.unquote, bytes(b'')) def test_unquoting_badpercent(self): # Test unquoting on bad percent-escapes given = '%xab' expect = given - result = urllib.parse.unquote(given) + result = urllib_parse.unquote(given) self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) given = '%x' expect = given - result = urllib.parse.unquote(given) + result = urllib_parse.unquote(given) self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) given = '%' expect = given - result = urllib.parse.unquote(given) + result = urllib_parse.unquote(given) self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) # unquote_to_bytes given = '%xab' expect = bytes(given, 'ascii') - result = urllib.parse.unquote_to_bytes(given) + result = urllib_parse.unquote_to_bytes(given) self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result)) given = '%x' expect = bytes(given, 'ascii') - result = urllib.parse.unquote_to_bytes(given) + result = urllib_parse.unquote_to_bytes(given) self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result)) given = '%' expect = bytes(given, 'ascii') - result = urllib.parse.unquote_to_bytes(given) + result = urllib_parse.unquote_to_bytes(given) self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result)) - self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None) - self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ()) + self.assertRaises((TypeError, AttributeError), urllib_parse.unquote_to_bytes, None) + self.assertRaises((TypeError, AttributeError), urllib_parse.unquote_to_bytes, ()) def test_unquoting_mixed_case(self): # Test unquoting on mixed-case hex digits in the percent-escapes given = '%Ab%eA' expect = b'\xab\xea' - result = urllib.parse.unquote_to_bytes(given) + result = urllib_parse.unquote_to_bytes(given) self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result)) @@ -806,10 +819,10 @@ def test_unquoting_parts(self): # interspersed given = 'ab%sd' % hexescape('c') expect = "abcd" - result = urllib.parse.unquote(given) + result = urllib_parse.unquote(given) self.assertEqual(expect, result, "using quote(): %r != %r" % (expect, result)) - result = urllib.parse.unquote_plus(given) + result = urllib_parse.unquote_plus(given) self.assertEqual(expect, result, "using unquote_plus(): %r != %r" % (expect, result)) @@ -817,25 +830,25 @@ def test_unquoting_plus(self): # Test difference between unquote() and unquote_plus() given = "are+there+spaces..." expect = given - result = urllib.parse.unquote(given) + result = urllib_parse.unquote(given) self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) expect = given.replace('+', ' ') - result = urllib.parse.unquote_plus(given) + result = urllib_parse.unquote_plus(given) self.assertEqual(expect, result, "using unquote_plus(): %r != %r" % (expect, result)) def test_unquote_to_bytes(self): given = 'br%C3%BCckner_sapporo_20050930.doc' expect = b'br\xc3\xbcckner_sapporo_20050930.doc' - result = urllib.parse.unquote_to_bytes(given) + result = urllib_parse.unquote_to_bytes(given) self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result)) # Test on a string with unescaped non-ASCII characters # (Technically an invalid URI; expect those characters to be UTF-8 # encoded). - result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC") + result = urllib_parse.unquote_to_bytes("\u6f22%C3%BC") expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc" self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" @@ -843,7 +856,7 @@ def test_unquote_to_bytes(self): # Test with a bytes as input given = b'%A2%D8ab%FF' expect = b'\xa2\xd8ab\xff' - result = urllib.parse.unquote_to_bytes(given) + result = urllib_parse.unquote_to_bytes(given) self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result)) @@ -851,7 +864,7 @@ def test_unquote_to_bytes(self): # (Technically an invalid URI; expect those bytes to be preserved) given = b'%A2\xd8ab%FF' expect = b'\xa2\xd8ab\xff' - result = urllib.parse.unquote_to_bytes(given) + result = urllib_parse.unquote_to_bytes(given) self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result)) @@ -860,16 +873,16 @@ def test_unquote_with_unicode(self): # Characters in the Latin-1 range, encoded with UTF-8 given = 'br%C3%BCckner_sapporo_20050930.doc' expect = 'br\u00fcckner_sapporo_20050930.doc' - result = urllib.parse.unquote(given) + result = urllib_parse.unquote(given) self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) # Characters in the Latin-1 range, encoded with None (default) - result = urllib.parse.unquote(given, encoding=None, errors=None) + result = urllib_parse.unquote(given, encoding=None, errors=None) self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) # Characters in the Latin-1 range, encoded with Latin-1 - result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc', + result = urllib_parse.unquote('br%FCckner_sapporo_20050930.doc', encoding="latin-1") expect = 'br\u00fcckner_sapporo_20050930.doc' self.assertEqual(expect, result, @@ -878,38 +891,38 @@ def test_unquote_with_unicode(self): # Characters in BMP, encoded with UTF-8 given = "%E6%BC%A2%E5%AD%97" expect = "\u6f22\u5b57" # "Kanji" - result = urllib.parse.unquote(given) + result = urllib_parse.unquote(given) self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) # Decode with UTF-8, invalid sequence given = "%F3%B1" expect = "\ufffd" # Replacement character - result = urllib.parse.unquote(given) + result = urllib_parse.unquote(given) self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) # Decode with UTF-8, invalid sequence, replace errors - result = urllib.parse.unquote(given, errors="replace") + result = urllib_parse.unquote(given, errors="replace") self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) # Decode with UTF-8, invalid sequence, ignoring errors given = "%F3%B1" expect = "" - result = urllib.parse.unquote(given, errors="ignore") + result = urllib_parse.unquote(given, errors="ignore") self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) # A mix of non-ASCII and percent-encoded characters, UTF-8 - result = urllib.parse.unquote("\u6f22%C3%BC") + result = urllib_parse.unquote("\u6f22%C3%BC") expect = '\u6f22\u00fc' self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) # A mix of non-ASCII and percent-encoded characters, Latin-1 # (Note, the string contains non-Latin-1-representable characters) - result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1") + result = urllib_parse.unquote("\u6f22%FC", encoding="latin-1") expect = '\u6f22\u00fc' self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) @@ -930,7 +943,7 @@ def help_inputtype(self, given, test_type): """ expect_somewhere = ["1st=1", "2nd=2", "3rd=3"] - result = urllib.parse.urlencode(given) + result = urllib_parse.urlencode(given) for expected in expect_somewhere: self.assertIn(expected, result, "testing %s: %s not found in %s" % @@ -963,20 +976,20 @@ def test_quoting(self): # Make sure keys and values are quoted using quote_plus() given = {"&":"="} expect = "%s=%s" % (hexescape('&'), hexescape('=')) - result = urllib.parse.urlencode(given) + result = urllib_parse.urlencode(given) self.assertEqual(expect, result) given = {"key name":"A bunch of pluses"} expect = "key+name=A+bunch+of+pluses" - result = urllib.parse.urlencode(given) + result = urllib_parse.urlencode(given) self.assertEqual(expect, result) def test_doseq(self): # Test that passing True for 'doseq' parameter works correctly given = {'sequence':['1', '2', '3']} - expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3'])) - result = urllib.parse.urlencode(given) + expect = "sequence=%s" % urllib_parse.quote_plus(str(['1', '2', '3'])) + result = urllib_parse.urlencode(given) self.assertEqual(expect, result) - result = urllib.parse.urlencode(given, True) + result = urllib_parse.urlencode(given, True) for value in given["sequence"]: expect = "sequence=%s" % value self.assertIn(expect, result) @@ -984,89 +997,90 @@ def test_doseq(self): "Expected 2 '&'s, got %s" % result.count('&')) def test_empty_sequence(self): - self.assertEqual("", urllib.parse.urlencode({})) - self.assertEqual("", urllib.parse.urlencode([])) + self.assertEqual("", urllib_parse.urlencode({})) + self.assertEqual("", urllib_parse.urlencode([])) def test_nonstring_values(self): - self.assertEqual("a=1", urllib.parse.urlencode({"a": 1})) - self.assertEqual("a=None", urllib.parse.urlencode({"a": None})) + self.assertEqual("a=1", urllib_parse.urlencode({"a": 1})) + self.assertEqual("a=None", urllib_parse.urlencode({"a": None})) def test_nonstring_seq_values(self): - self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True)) + from future.backports import OrderedDict + self.assertEqual("a=1&a=2", urllib_parse.urlencode({"a": [1, 2]}, True)) self.assertEqual("a=None&a=a", - urllib.parse.urlencode({"a": [None, "a"]}, True)) - data = collections.OrderedDict([("a", 1), ("b", 1)]) + urllib_parse.urlencode({"a": [None, "a"]}, True)) + data = OrderedDict([("a", 1), ("b", 1)]) self.assertEqual("a=a&a=b", - urllib.parse.urlencode({"a": data}, True)) + urllib_parse.urlencode({"a": data}, True)) def test_urlencode_encoding(self): # ASCII encoding. Expect %3F with errors="replace' given = (('\u00a0', '\u00c1'),) expect = '%3F=%3F' - result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace") + result = urllib_parse.urlencode(given, encoding="ASCII", errors="replace") self.assertEqual(expect, result) # Default is UTF-8 encoding. given = (('\u00a0', '\u00c1'),) expect = '%C2%A0=%C3%81' - result = urllib.parse.urlencode(given) + result = urllib_parse.urlencode(given) self.assertEqual(expect, result) # Latin-1 encoding. given = (('\u00a0', '\u00c1'),) expect = '%A0=%C1' - result = urllib.parse.urlencode(given, encoding="latin-1") + result = urllib_parse.urlencode(given, encoding="latin-1") self.assertEqual(expect, result) def test_urlencode_encoding_doseq(self): # ASCII Encoding. Expect %3F with errors="replace' given = (('\u00a0', '\u00c1'),) expect = '%3F=%3F' - result = urllib.parse.urlencode(given, doseq=True, + result = urllib_parse.urlencode(given, doseq=True, encoding="ASCII", errors="replace") self.assertEqual(expect, result) # ASCII Encoding. On a sequence of values. given = (("\u00a0", (1, "\u00c1")),) expect = '%3F=1&%3F=%3F' - result = urllib.parse.urlencode(given, True, + result = urllib_parse.urlencode(given, True, encoding="ASCII", errors="replace") self.assertEqual(expect, result) # Utf-8 given = (("\u00a0", "\u00c1"),) expect = '%C2%A0=%C3%81' - result = urllib.parse.urlencode(given, True) + result = urllib_parse.urlencode(given, True) self.assertEqual(expect, result) given = (("\u00a0", (42, "\u00c1")),) expect = '%C2%A0=42&%C2%A0=%C3%81' - result = urllib.parse.urlencode(given, True) + result = urllib_parse.urlencode(given, True) self.assertEqual(expect, result) # latin-1 given = (("\u00a0", "\u00c1"),) expect = '%A0=%C1' - result = urllib.parse.urlencode(given, True, encoding="latin-1") + result = urllib_parse.urlencode(given, True, encoding="latin-1") self.assertEqual(expect, result) given = (("\u00a0", (42, "\u00c1")),) expect = '%A0=42&%A0=%C1' - result = urllib.parse.urlencode(given, True, encoding="latin-1") + result = urllib_parse.urlencode(given, True, encoding="latin-1") self.assertEqual(expect, result) def test_urlencode_bytes(self): given = ((b'\xa0\x24', b'\xc1\x24'),) expect = '%A0%24=%C1%24' - result = urllib.parse.urlencode(given) + result = urllib_parse.urlencode(given) self.assertEqual(expect, result) - result = urllib.parse.urlencode(given, True) + result = urllib_parse.urlencode(given, True) self.assertEqual(expect, result) # Sequence of values given = ((b'\xa0\x24', (42, b'\xc1\x24')),) expect = '%A0%24=42&%A0%24=%C1%24' - result = urllib.parse.urlencode(given, True) + result = urllib_parse.urlencode(given, True) self.assertEqual(expect, result) def test_urlencode_encoding_safe_parameter(self): @@ -1075,37 +1089,37 @@ def test_urlencode_encoding_safe_parameter(self): # Default utf-8 encoding given = ((b'\xa0\x24', b'\xc1\x24'),) - result = urllib.parse.urlencode(given, safe=":$") + result = urllib_parse.urlencode(given, safe=":$") expect = '%A0$=%C1$' self.assertEqual(expect, result) given = ((b'\xa0\x24', b'\xc1\x24'),) - result = urllib.parse.urlencode(given, doseq=True, safe=":$") + result = urllib_parse.urlencode(given, doseq=True, safe=":$") expect = '%A0$=%C1$' self.assertEqual(expect, result) # Safe parameter in sequence given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) expect = '%A0$=%C1$&%A0$=13&%A0$=42' - result = urllib.parse.urlencode(given, True, safe=":$") + result = urllib_parse.urlencode(given, True, safe=":$") self.assertEqual(expect, result) # Test all above in latin-1 encoding given = ((b'\xa0\x24', b'\xc1\x24'),) - result = urllib.parse.urlencode(given, safe=":$", + result = urllib_parse.urlencode(given, safe=":$", encoding="latin-1") expect = '%A0$=%C1$' self.assertEqual(expect, result) given = ((b'\xa0\x24', b'\xc1\x24'),) expect = '%A0$=%C1$' - result = urllib.parse.urlencode(given, doseq=True, safe=":$", + result = urllib_parse.urlencode(given, doseq=True, safe=":$", encoding="latin-1") given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) expect = '%A0$=%C1$&%A0$=13&%A0$=42' - result = urllib.parse.urlencode(given, True, safe=":$", + result = urllib_parse.urlencode(given, True, safe=":$", encoding="latin-1") self.assertEqual(expect, result) @@ -1116,11 +1130,11 @@ def test_basic(self): # Make sure simple tests pass expected_path = os.path.join("parts", "of", "a", "path") expected_url = "parts/of/a/path" - result = urllib.request.pathname2url(expected_path) + result = urllib_request.pathname2url(expected_path) self.assertEqual(expected_url, result, "pathname2url() failed; %s != %s" % (result, expected_url)) - result = urllib.request.url2pathname(expected_url) + result = urllib_request.url2pathname(expected_url) self.assertEqual(expected_path, result, "url2pathame() failed; %s != %s" % (result, expected_path)) @@ -1129,25 +1143,25 @@ def test_quoting(self): # Test automatic quoting and unquoting works for pathnam2url() and # url2pathname() respectively given = os.path.join("needs", "quot=ing", "here") - expect = "needs/%s/here" % urllib.parse.quote("quot=ing") - result = urllib.request.pathname2url(given) + expect = "needs/%s/here" % urllib_parse.quote("quot=ing") + result = urllib_request.pathname2url(given) self.assertEqual(expect, result, "pathname2url() failed; %s != %s" % (expect, result)) expect = given - result = urllib.request.url2pathname(result) + result = urllib_request.url2pathname(result) self.assertEqual(expect, result, "url2pathname() failed; %s != %s" % (expect, result)) given = os.path.join("make sure", "using_quote") - expect = "%s/using_quote" % urllib.parse.quote("make sure") - result = urllib.request.pathname2url(given) + expect = "%s/using_quote" % urllib_parse.quote("make sure") + result = urllib_request.pathname2url(given) self.assertEqual(expect, result, "pathname2url() failed; %s != %s" % (expect, result)) given = "make+sure/using_unquote" expect = os.path.join("make+sure", "using_unquote") - result = urllib.request.url2pathname(given) + result = urllib_request.url2pathname(given) self.assertEqual(expect, result, "url2pathname() failed; %s != %s" % (expect, result)) @@ -1158,15 +1172,15 @@ def test_ntpath(self): given = ('/C:/', '///C:/', '/C|//') expect = 'C:\\' for url in given: - result = urllib.request.url2pathname(url) + result = urllib_request.url2pathname(url) self.assertEqual(expect, result, - 'urllib.request..url2pathname() failed; %s != %s' % + 'urllib_request..url2pathname() failed; %s != %s' % (expect, result)) given = '///C|/path' expect = 'C:\\path' - result = urllib.request.url2pathname(given) + result = urllib_request.url2pathname(given) self.assertEqual(expect, result, - 'urllib.request.url2pathname() failed; %s != %s' % + 'urllib_request.url2pathname() failed; %s != %s' % (expect, result)) class Utility_Tests(unittest.TestCase): @@ -1176,27 +1190,27 @@ def test_splitpasswd(self): """Some of password examples are not sensible, but it is added to confirming to RFC2617 and addressing issue4675. """ - self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab')) - self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb')) - self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb')) - self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb')) - self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb')) - self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb')) - self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b')) - self.assertEqual(('user', 'a b'),urllib.parse.splitpasswd('user:a b')) - self.assertEqual(('user 2', 'ab'),urllib.parse.splitpasswd('user 2:ab')) - self.assertEqual(('user+1', 'a+b'),urllib.parse.splitpasswd('user+1:a+b')) + self.assertEqual(('user', 'ab'),urllib_parse.splitpasswd('user:ab')) + self.assertEqual(('user', 'a\nb'),urllib_parse.splitpasswd('user:a\nb')) + self.assertEqual(('user', 'a\tb'),urllib_parse.splitpasswd('user:a\tb')) + self.assertEqual(('user', 'a\rb'),urllib_parse.splitpasswd('user:a\rb')) + self.assertEqual(('user', 'a\fb'),urllib_parse.splitpasswd('user:a\fb')) + self.assertEqual(('user', 'a\vb'),urllib_parse.splitpasswd('user:a\vb')) + self.assertEqual(('user', 'a:b'),urllib_parse.splitpasswd('user:a:b')) + self.assertEqual(('user', 'a b'),urllib_parse.splitpasswd('user:a b')) + self.assertEqual(('user 2', 'ab'),urllib_parse.splitpasswd('user 2:ab')) + self.assertEqual(('user+1', 'a+b'),urllib_parse.splitpasswd('user+1:a+b')) def test_thishost(self): - """Test the urllib.request.thishost utility function returns a tuple""" - self.assertIsInstance(urllib.request.thishost(), tuple) + """Test the urllib_request.thishost utility function returns a tuple""" + self.assertIsInstance(urllib_request.thishost(), tuple) class URLopener_Tests(unittest.TestCase): """Testcase to test the open method of URLopener class.""" def test_quoted_open(self): - class DummyURLopener(urllib.request.URLopener): + class DummyURLopener(urllib_request.URLopener): def open_spam(self, url): return url with support.check_warnings( @@ -1215,7 +1229,7 @@ def open_spam(self, url): # Everywhere else they work ok, but on those machines, sometimes # fail in one of the tests, sometimes in other. I have a linux, and # the tests go ok. -# If anybody has one of the problematic enviroments, please help! +# If anybody has one of the problematic environments, please help! # . Facundo # # def server(evt): @@ -1289,17 +1303,17 @@ def open_spam(self, url): # ftp.close() class RequestTests(unittest.TestCase): - """Unit tests for urllib.request.Request.""" + """Unit tests for urllib_request.Request.""" def test_default_values(self): - Request = urllib.request.Request + Request = urllib_request.Request request = Request("http://www.python.org") self.assertEqual(request.get_method(), 'GET') request = Request("http://www.python.org", {}) self.assertEqual(request.get_method(), 'POST') def test_with_method_arg(self): - Request = urllib.request.Request + Request = urllib_request.Request request = Request("http://www.python.org", method='HEAD') self.assertEqual(request.method, 'HEAD') self.assertEqual(request.get_method(), 'HEAD') @@ -1312,24 +1326,61 @@ def test_with_method_arg(self): self.assertEqual(request.get_method(), 'HEAD') -def test_main(): - support.run_unittest( - urlopen_FileTests, - urlopen_HttpTests, - urlretrieve_FileTests, - urlretrieve_HttpTests, - ProxyTests, - QuotingTests, - UnquotingTests, - urlencode_Tests, - Pathname_Tests, - Utility_Tests, - URLopener_Tests, - #FTPWrapperTests, - RequestTests, - ) +class URL2PathNameTests(unittest.TestCase): + + @expectedFailurePY26 + def test_converting_drive_letter(self): + self.assertEqual(url2pathname("///C|"), 'C:') + self.assertEqual(url2pathname("///C:"), 'C:') + self.assertEqual(url2pathname("///C|/"), 'C:\\') + + def test_converting_when_no_drive_letter(self): + # cannot end a raw string in \ + self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\') + self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\') + + def test_simple_compare(self): + self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"), + r'C:\foo\bar\spam.foo') + + def test_non_ascii_drive_letter(self): + self.assertRaises(IOError, url2pathname, "///\u00e8|/") + + def test_roundtrip_url2pathname(self): + list_of_paths = ['C:', + r'\\\C\test\\', + r'C:\foo\bar\spam.foo' + ] + for path in list_of_paths: + self.assertEqual(url2pathname(pathname2url(path)), path) + +class PathName2URLTests(unittest.TestCase): + + def test_converting_drive_letter(self): + self.assertEqual(pathname2url("C:"), '///C:') + self.assertEqual(pathname2url("C:\\"), '///C:') + + def test_converting_when_no_drive_letter(self): + self.assertEqual(pathname2url(r"\\\folder\test" "\\"), + '/////folder/test/') + self.assertEqual(pathname2url(r"\\folder\test" "\\"), + '////folder/test/') + self.assertEqual(pathname2url(r"\folder\test" "\\"), + '/folder/test/') + + def test_simple_compare(self): + self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'), + "///C:/foo/bar/spam.foo" ) + def test_long_drive_letter(self): + self.assertRaises(IOError, pathname2url, "XX:\\") + def test_roundtrip_pathname2url(self): + list_of_paths = ['///C:', + '/////folder/test/', + '///C:/foo/bar/spam.foo'] + for path in list_of_paths: + self.assertEqual(pathname2url(url2pathname(path)), path) if __name__ == '__main__': - test_main() + unittest.main() diff --git a/tests/test_future/test_urllib2.py b/tests/test_future/test_urllib2.py new file mode 100644 index 00000000..bd8e75c5 --- /dev/null +++ b/tests/test_future/test_urllib2.py @@ -0,0 +1,1568 @@ +from __future__ import absolute_import, division, unicode_literals +import os +import io +import socket +import array +import sys + +import http.client +from future.standard_library import install_aliases +from future.backports.test import support +import future.backports.urllib.request as urllib_request +# The proxy bypass method imported below has logic specific to the OSX +# proxy config data structure but is testable on all platforms. +from future.backports.urllib.request import Request, OpenerDirector, _proxy_bypass_macosx_sysconf +import future.backports.urllib.error as urllib_error +from future.tests.base import unittest, skip26 +from future.builtins import bytes, dict, int, open, str, zip +from future.utils import text_to_native_str + +install_aliases() # for base64.encodebytes on Py2 + +# from future.tests.test_http_cookiejar import interact_netscape + +class FakeResponse(object): + def __init__(self, headers=[], url=None): + """ + headers: list of RFC822-style 'Key: value' strings + """ + import email + # The email.message_from_string is available on both Py2.7 and Py3.3 + self._headers = email.message_from_string("\n".join(headers)) + self._url = url + def info(self): return self._headers + + +def interact_netscape(cookiejar, url, *set_cookie_hdrs): + return _interact(cookiejar, url, set_cookie_hdrs, "Set-Cookie") + +def _interact(cookiejar, url, set_cookie_hdrs, hdr_name): + """Perform a single request / response cycle, returning Cookie: header.""" + req = urllib_request.Request(url) + cookiejar.add_cookie_header(req) + cookie_hdr = req.get_header("Cookie", "") + headers = [] + for hdr in set_cookie_hdrs: + headers.append("%s: %s" % (hdr_name, hdr)) + res = FakeResponse(headers, url) + cookiejar.extract_cookies(res, req) + return cookie_hdr + + +# XXX +# Request +# CacheFTPHandler (hard to write) +# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler + +class TrivialTests(unittest.TestCase): + + def test___all__(self): + # Verify which names are exposed + for module in 'request', 'response', 'parse', 'error', 'robotparser': + context = {} + exec('from future.backports.urllib.%s import *' % module, context) + del context['__builtins__'] + if module == 'request' and os.name == 'nt': + u, p = context.pop('url2pathname'), context.pop('pathname2url') + self.assertEqual(u.__module__, 'nturl2path') + self.assertEqual(p.__module__, 'nturl2path') + for k, v in context.items(): + self.assertEqual(v.__module__, 'future.backports.urllib.%s' % module, + "%r is exposed in 'future.backports.urllib.%s' but defined in %r" % + (k, module, v.__module__)) + + def test_trivial(self): + # A couple trivial tests + + self.assertRaises(ValueError, urllib_request.urlopen, 'bogus url') + + # XXX Name hacking to get this to work on Windows. + fname = os.path.abspath(urllib_request.__file__).replace('\\', '/') + + if os.name == 'nt': + file_url = "file:///%s" % fname + else: + file_url = "file://%s" % fname + + f = urllib_request.urlopen(file_url) + + f.read() + f.close() + + def test_parse_http_list(self): + tests = [ + ('a,b,c', ['a', 'b', 'c']), + ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']), + ('a, b, "c", "d", "e,f", g, h', + ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']), + ('a="b\\"c", d="e\\,f", g="h\\\\i"', + ['a="b"c"', 'd="e,f"', 'g="h\\i"'])] + for string, list in tests: + self.assertEqual(urllib_request.parse_http_list(string), list) + + def test_URLError_reasonstr(self): + err = urllib_error.URLError('reason') + self.assertIn(err.reason, str(err)) + +class RequestHdrsTests(unittest.TestCase): + + def test_request_headers_dict(self): + """ + The Request.headers dictionary is not a documented interface. It + should stay that way, because the complete set of headers are only + accessible through the .get_header(), .has_header(), .header_items() + interface. However, .headers pre-dates those methods, and so real code + will be using the dictionary. + + The introduction in 2.4 of those methods was a mistake for the same + reason: code that previously saw all (urllib2 user)-provided headers in + .headers now sees only a subset. + + """ + url = "http://example.com" + self.assertEqual(Request(url, + headers={"Spam-eggs": "blah"} + ).headers["Spam-eggs"], "blah") + self.assertEqual(Request(url, + headers={"spam-EggS": "blah"} + ).headers["Spam-eggs"], "blah") + + def test_request_headers_methods(self): + """ + Note the case normalization of header names here, to + .capitalize()-case. This should be preserved for + backwards-compatibility. (In the HTTP case, normalization to + .title()-case is done by urllib2 before sending headers to + http.client). + + Note that e.g. r.has_header("spam-EggS") is currently False, and + r.get_header("spam-EggS") returns None, but that could be changed in + future. + + Method r.remove_header should remove items both from r.headers and + r.unredirected_hdrs dictionaries + """ + url = "http://example.com" + req = Request(url, headers={"Spam-eggs": "blah"}) + self.assertTrue(req.has_header("Spam-eggs")) + self.assertEqual(req.header_items(), [('Spam-eggs', 'blah')]) + + req.add_header("Foo-Bar", "baz") + self.assertEqual(sorted(req.header_items()), + [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]) + self.assertFalse(req.has_header("Not-there")) + self.assertIsNone(req.get_header("Not-there")) + self.assertEqual(req.get_header("Not-there", "default"), "default") + + + def test_password_manager(self): + mgr = urllib_request.HTTPPasswordMgr() + add = mgr.add_password + find_user_pass = mgr.find_user_password + add("Some Realm", "http://example.com/", "joe", "password") + add("Some Realm", "http://example.com/ni", "ni", "ni") + add("c", "http://example.com/foo", "foo", "ni") + add("c", "http://example.com/bar", "bar", "nini") + add("b", "http://example.com/", "first", "blah") + add("b", "http://example.com/", "second", "spam") + add("a", "http://example.com", "1", "a") + add("Some Realm", "http://c.example.com:3128", "3", "c") + add("Some Realm", "d.example.com", "4", "d") + add("Some Realm", "e.example.com:3128", "5", "e") + + self.assertEqual(find_user_pass("Some Realm", "example.com"), + ('joe', 'password')) + + #self.assertEqual(find_user_pass("Some Realm", "http://example.com/ni"), + # ('ni', 'ni')) + + self.assertEqual(find_user_pass("Some Realm", "http://example.com"), + ('joe', 'password')) + self.assertEqual(find_user_pass("Some Realm", "http://example.com/"), + ('joe', 'password')) + self.assertEqual( + find_user_pass("Some Realm", "http://example.com/spam"), + ('joe', 'password')) + self.assertEqual( + find_user_pass("Some Realm", "http://example.com/spam/spam"), + ('joe', 'password')) + self.assertEqual(find_user_pass("c", "http://example.com/foo"), + ('foo', 'ni')) + self.assertEqual(find_user_pass("c", "http://example.com/bar"), + ('bar', 'nini')) + self.assertEqual(find_user_pass("b", "http://example.com/"), + ('second', 'spam')) + + # No special relationship between a.example.com and example.com: + + self.assertEqual(find_user_pass("a", "http://example.com/"), + ('1', 'a')) + self.assertEqual(find_user_pass("a", "http://a.example.com/"), + (None, None)) + + # Ports: + + self.assertEqual(find_user_pass("Some Realm", "c.example.com"), + (None, None)) + self.assertEqual(find_user_pass("Some Realm", "c.example.com:3128"), + ('3', 'c')) + self.assertEqual( + find_user_pass("Some Realm", "http://c.example.com:3128"), + ('3', 'c')) + self.assertEqual(find_user_pass("Some Realm", "d.example.com"), + ('4', 'd')) + self.assertEqual(find_user_pass("Some Realm", "e.example.com:3128"), + ('5', 'e')) + + def test_password_manager_default_port(self): + """ + The point to note here is that we can't guess the default port if + there's no scheme. This applies to both add_password and + find_user_password. + """ + mgr = urllib_request.HTTPPasswordMgr() + add = mgr.add_password + find_user_pass = mgr.find_user_password + add("f", "http://g.example.com:80", "10", "j") + add("g", "http://h.example.com", "11", "k") + add("h", "i.example.com:80", "12", "l") + add("i", "j.example.com", "13", "m") + self.assertEqual(find_user_pass("f", "g.example.com:100"), + (None, None)) + self.assertEqual(find_user_pass("f", "g.example.com:80"), + ('10', 'j')) + self.assertEqual(find_user_pass("f", "g.example.com"), + (None, None)) + self.assertEqual(find_user_pass("f", "http://g.example.com:100"), + (None, None)) + self.assertEqual(find_user_pass("f", "http://g.example.com:80"), + ('10', 'j')) + self.assertEqual(find_user_pass("f", "http://g.example.com"), + ('10', 'j')) + self.assertEqual(find_user_pass("g", "h.example.com"), ('11', 'k')) + self.assertEqual(find_user_pass("g", "h.example.com:80"), ('11', 'k')) + self.assertEqual(find_user_pass("g", "http://h.example.com:80"), + ('11', 'k')) + self.assertEqual(find_user_pass("h", "i.example.com"), (None, None)) + self.assertEqual(find_user_pass("h", "i.example.com:80"), ('12', 'l')) + self.assertEqual(find_user_pass("h", "http://i.example.com:80"), + ('12', 'l')) + self.assertEqual(find_user_pass("i", "j.example.com"), ('13', 'm')) + self.assertEqual(find_user_pass("i", "j.example.com:80"), + (None, None)) + self.assertEqual(find_user_pass("i", "http://j.example.com"), + ('13', 'm')) + self.assertEqual(find_user_pass("i", "http://j.example.com:80"), + (None, None)) + + +class MockOpener(object): + addheaders = [] + def open(self, req, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + self.req, self.data, self.timeout = req, data, timeout + def error(self, proto, *args): + self.proto, self.args = proto, args + +class MockFile(object): + def read(self, count=None): pass + def readline(self, count=None): pass + def close(self): pass + +class MockHeaders(dict): + def getheaders(self, name): + return list(self.values()) + +class MockResponse(io.StringIO): + def __init__(self, code, msg, headers, data, url=None): + io.StringIO.__init__(self, data) + self.code, self.msg, self.headers, self.url = code, msg, headers, url + def info(self): + return self.headers + def geturl(self): + return self.url + +class MockCookieJar(object): + def add_cookie_header(self, request): + self.ach_req = request + def extract_cookies(self, response, request): + self.ec_req, self.ec_r = request, response + +class FakeMethod(object): + def __init__(self, meth_name, action, handle): + self.meth_name = meth_name + self.handle = handle + self.action = action + def __call__(self, *args): + return self.handle(self.meth_name, self.action, *args) + +class MockHTTPResponse(io.IOBase): + def __init__(self, fp, msg, status, reason): + self.fp = fp + self.msg = msg + self.status = status + self.reason = reason + self.code = 200 + + def read(self): + return '' + + def info(self): + return {} + + def geturl(self): + return self.url + + +class MockHTTPClass(object): + def __init__(self): + self.level = 0 + self.req_headers = [] + self.data = None + self.raise_on_endheaders = False + self.sock = None + self._tunnel_headers = {} + + def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + self.host = host + self.timeout = timeout + return self + + def set_debuglevel(self, level): + self.level = level + + def set_tunnel(self, host, port=None, headers=None): + self._tunnel_host = host + self._tunnel_port = port + if headers: + self._tunnel_headers = headers + else: + self._tunnel_headers.clear() + + def request(self, method, url, body=None, headers=None): + self.method = method + self.selector = url + if headers is not None: + self.req_headers += headers.items() + self.req_headers.sort() + if body: + self.data = body + if self.raise_on_endheaders: + import socket + raise socket.error() + def getresponse(self): + return MockHTTPResponse(MockFile(), {}, 200, "OK") + + def close(self): + pass + +class MockHandler(object): + # useful for testing handler machinery + # see add_ordered_mock_handlers() docstring + handler_order = 500 + def __init__(self, methods): + self._define_methods(methods) + def _define_methods(self, methods): + for spec in methods: + if len(spec) == 2: name, action = spec + else: name, action = spec, None + meth = FakeMethod(name, action, self.handle) + setattr(self.__class__, name, meth) + def handle(self, fn_name, action, *args, **kwds): + self.parent.calls.append((self, fn_name, args, kwds)) + if action is None: + return None + elif action == "return self": + return self + elif action == "return response": + res = MockResponse(200, "OK", {}, "") + return res + elif action == "return request": + return Request("http://blah/") + elif action.startswith("error"): + code = action[action.rfind(" ")+1:] + try: + code = int(code) + except ValueError: + pass + res = MockResponse(200, "OK", {}, "") + return self.parent.error("http", args[0], res, code, "", {}) + elif action == "raise": + raise urllib_error.URLError("blah") + assert False + def close(self): pass + def add_parent(self, parent): + self.parent = parent + self.parent.calls = [] + def __lt__(self, other): + if not hasattr(other, "handler_order"): + # No handler_order, leave in original order. Yuck. + return True + return self.handler_order < other.handler_order + +def add_ordered_mock_handlers(opener, meth_spec): + """Create MockHandlers and add them to an OpenerDirector. + + meth_spec: list of lists of tuples and strings defining methods to define + on handlers. eg: + + [["http_error", "ftp_open"], ["http_open"]] + + defines methods .http_error() and .ftp_open() on one handler, and + .http_open() on another. These methods just record their arguments and + return None. Using a tuple instead of a string causes the method to + perform some action (see MockHandler.handle()), eg: + + [["http_error"], [("http_open", "return request")]] + + defines .http_error() on one handler (which simply returns None), and + .http_open() on another handler, which returns a Request object. + + """ + handlers = [] + count = 0 + for meths in meth_spec: + class MockHandlerSubclass(MockHandler): pass + h = MockHandlerSubclass(meths) + h.handler_order += count + h.add_parent(opener) + count = count + 1 + handlers.append(h) + opener.add_handler(h) + return handlers + +def build_test_opener(*handler_instances): + opener = OpenerDirector() + for h in handler_instances: + opener.add_handler(h) + return opener + +class MockHTTPHandler(urllib_request.BaseHandler): + # useful for testing redirections and auth + # sends supplied headers and code as first response + # sends 200 OK as second response + def __init__(self, code, headers): + self.code = code + self.headers = headers + self.reset() + def reset(self): + self._count = 0 + self.requests = [] + def http_open(self, req): + import future.backports.email as email + import copy + self.requests.append(copy.deepcopy(req)) + if self._count == 0: + self._count = self._count + 1 + name = http.client.responses[self.code] + msg = email.message_from_string(self.headers) + return self.parent.error( + "http", req, MockFile(), self.code, name, msg) + else: + self.req = req + msg = email.message_from_string("\r\n\r\n") + return MockResponse(200, "OK", msg, "", req.get_full_url()) + +class MockHTTPSHandler(urllib_request.AbstractHTTPHandler): + # Useful for testing the Proxy-Authorization request by verifying the + # properties of httpcon + + def __init__(self): + urllib_request.AbstractHTTPHandler.__init__(self) + self.httpconn = MockHTTPClass() + + def https_open(self, req): + return self.do_open(self.httpconn, req) + +class MockPasswordManager(object): + def add_password(self, realm, uri, user, password): + self.realm = realm + self.url = uri + self.user = user + self.password = password + def find_user_password(self, realm, authuri): + self.target_realm = realm + self.target_url = authuri + return self.user, self.password + + +class OpenerDirectorTests(unittest.TestCase): + + def test_add_non_handler(self): + class NonHandler(object): + pass + self.assertRaises(TypeError, + OpenerDirector().add_handler, NonHandler()) + + def test_badly_named_methods(self): + # test work-around for three methods that accidentally follow the + # naming conventions for handler methods + # (*_open() / *_request() / *_response()) + + # These used to call the accidentally-named methods, causing a + # TypeError in real code; here, returning self from these mock + # methods would either cause no exception, or AttributeError. + + from future.backports.urllib.error import URLError + + o = OpenerDirector() + meth_spec = [ + [("do_open", "return self"), ("proxy_open", "return self")], + [("redirect_request", "return self")], + ] + add_ordered_mock_handlers(o, meth_spec) + o.add_handler(urllib_request.UnknownHandler()) + for scheme in "do", "proxy", "redirect": + self.assertRaises(URLError, o.open, scheme+"://example.com/") + + def test_handled(self): + # handler returning non-None means no more handlers will be called + o = OpenerDirector() + meth_spec = [ + ["http_open", "ftp_open", "http_error_302"], + ["ftp_open"], + [("http_open", "return self")], + [("http_open", "return self")], + ] + handlers = add_ordered_mock_handlers(o, meth_spec) + + req = Request("http://example.com/") + r = o.open(req) + # Second .http_open() gets called, third doesn't, since second returned + # non-None. Handlers without .http_open() never get any methods called + # on them. + # In fact, second mock handler defining .http_open() returns self + # (instead of response), which becomes the OpenerDirector's return + # value. + self.assertEqual(r, handlers[2]) + calls = [(handlers[0], "http_open"), (handlers[2], "http_open")] + for expected, got in zip(calls, o.calls): + handler, name, args, kwds = got + self.assertEqual((handler, name), expected) + self.assertEqual(args, (req,)) + + def test_handler_order(self): + o = OpenerDirector() + handlers = [] + for meths, handler_order in [ + ([("http_open", "return self")], 500), + (["http_open"], 0), + ]: + class MockHandlerSubclass(MockHandler): pass + h = MockHandlerSubclass(meths) + h.handler_order = handler_order + handlers.append(h) + o.add_handler(h) + + o.open("http://example.com/") + # handlers called in reverse order, thanks to their sort order + self.assertEqual(o.calls[0][0], handlers[1]) + self.assertEqual(o.calls[1][0], handlers[0]) + + def test_raise(self): + # raising URLError stops processing of request + o = OpenerDirector() + meth_spec = [ + [("http_open", "raise")], + [("http_open", "return self")], + ] + handlers = add_ordered_mock_handlers(o, meth_spec) + + req = Request("http://example.com/") + self.assertRaises(urllib_error.URLError, o.open, req) + self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})]) + + def test_http_error(self): + # XXX http_error_default + # http errors are a special case + o = OpenerDirector() + meth_spec = [ + [("http_open", "error 302")], + [("http_error_400", "raise"), "http_open"], + [("http_error_302", "return response"), "http_error_303", + "http_error"], + [("http_error_302")], + ] + handlers = add_ordered_mock_handlers(o, meth_spec) + + class Unknown(object): + def __eq__(self, other): return True + + req = Request("http://example.com/") + o.open(req) + assert len(o.calls) == 2 + calls = [(handlers[0], "http_open", (req,)), + (handlers[2], "http_error_302", + (req, Unknown(), 302, "", {}))] + for expected, got in zip(calls, o.calls): + handler, method_name, args = expected + self.assertEqual((handler, method_name), got[:2]) + self.assertEqual(args, got[2]) + + + def test_processors(self): + # *_request / *_response methods get called appropriately + o = OpenerDirector() + meth_spec = [ + [("http_request", "return request"), + ("http_response", "return response")], + [("http_request", "return request"), + ("http_response", "return response")], + ] + handlers = add_ordered_mock_handlers(o, meth_spec) + + req = Request("http://example.com/") + o.open(req) + # processor methods are called on *all* handlers that define them, + # not just the first handler that handles the request + calls = [ + (handlers[0], "http_request"), (handlers[1], "http_request"), + (handlers[0], "http_response"), (handlers[1], "http_response")] + + for i, (handler, name, args, kwds) in enumerate(o.calls): + if i < 2: + # *_request + self.assertEqual((handler, name), calls[i]) + self.assertEqual(len(args), 1) + self.assertIsInstance(args[0], Request) + else: + # *_response + self.assertEqual((handler, name), calls[i]) + self.assertEqual(len(args), 2) + self.assertIsInstance(args[0], Request) + # response from opener.open is None, because there's no + # handler that defines http_open to handle it + self.assertTrue(args[1] is None or + isinstance(args[1], MockResponse)) + + def test_method_deprecations(self): + req = Request("http://www.example.com") + + with self.assertWarns(DeprecationWarning): + req.add_data("data") + with self.assertWarns(DeprecationWarning): + req.get_data() + with self.assertWarns(DeprecationWarning): + req.has_data() + with self.assertWarns(DeprecationWarning): + req.get_host() + with self.assertWarns(DeprecationWarning): + req.get_selector() + with self.assertWarns(DeprecationWarning): + req.is_unverifiable() + with self.assertWarns(DeprecationWarning): + req.get_origin_req_host() + with self.assertWarns(DeprecationWarning): + req.get_type() + + +def sanepathname2url(path): + try: + path.encode("utf-8") + except UnicodeEncodeError: + raise unittest.SkipTest("path is not encodable to utf8") + urlpath = urllib_request.pathname2url(path) + if os.name == "nt" and urlpath.startswith("///"): + urlpath = urlpath[2:] + # XXX don't ask me about the mac... + return urlpath + +class HandlerTests(unittest.TestCase): + + def test_ftp(self): + class MockFTPWrapper(object): + def __init__(self, data): self.data = data + def retrfile(self, filename, filetype): + self.filename, self.filetype = filename, filetype + return io.StringIO(self.data), len(self.data) + def close(self): pass + + class NullFTPHandler(urllib_request.FTPHandler): + def __init__(self, data): self.data = data + def connect_ftp(self, user, passwd, host, port, dirs, + timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + self.user, self.passwd = user, passwd + self.host, self.port = host, port + self.dirs = dirs + self.ftpwrapper = MockFTPWrapper(self.data) + return self.ftpwrapper + + import ftplib + data = "rheum rhaponicum" + h = NullFTPHandler(data) + h.parent = MockOpener() + + for url, host, port, user, passwd, type_, dirs, filename, mimetype in [ + ("ftp://localhost/foo/bar/baz.html", + "localhost", ftplib.FTP_PORT, "", "", "I", + ["foo", "bar"], "baz.html", "text/html"), + ("ftp://parrot@localhost/foo/bar/baz.html", + "localhost", ftplib.FTP_PORT, "parrot", "", "I", + ["foo", "bar"], "baz.html", "text/html"), + ("ftp://%25parrot@localhost/foo/bar/baz.html", + "localhost", ftplib.FTP_PORT, "%parrot", "", "I", + ["foo", "bar"], "baz.html", "text/html"), + ("ftp://%2542parrot@localhost/foo/bar/baz.html", + "localhost", ftplib.FTP_PORT, "%42parrot", "", "I", + ["foo", "bar"], "baz.html", "text/html"), + ("ftp://localhost:80/foo/bar/", + "localhost", 80, "", "", "D", + ["foo", "bar"], "", None), + ("ftp://localhost/baz.gif;type=a", + "localhost", ftplib.FTP_PORT, "", "", "A", + [], "baz.gif", None), + ("ftp://localhost/baz.gif", + "localhost", ftplib.FTP_PORT, "", "", "I", + [], "baz.gif", "image/gif"), + ]: + req = Request(url) + req.timeout = None + r = h.ftp_open(req) + # ftp authentication not yet implemented by FTPHandler + self.assertEqual(h.user, user) + self.assertEqual(h.passwd, passwd) + self.assertEqual(h.host, socket.gethostbyname(host)) + self.assertEqual(h.port, port) + self.assertEqual(h.dirs, dirs) + self.assertEqual(h.ftpwrapper.filename, filename) + self.assertEqual(h.ftpwrapper.filetype, type_) + headers = r.info() + self.assertEqual(headers.get("Content-type"), mimetype) + self.assertEqual(int(headers["Content-length"]), len(data)) + + def test_file(self): + import future.backports.email.utils as email_utils + import socket + h = urllib_request.FileHandler() + o = h.parent = MockOpener() + + TESTFN = support.TESTFN + urlpath = sanepathname2url(os.path.abspath(TESTFN)) + towrite = b"hello, world\n" + urls = [ + "file://localhost%s" % urlpath, + "file://%s" % urlpath, + "file://%s%s" % (socket.gethostbyname('localhost'), urlpath), + ] + try: + localaddr = socket.gethostbyname(socket.gethostname()) + except socket.gaierror: + localaddr = '' + if localaddr: + urls.append("file://%s%s" % (localaddr, urlpath)) + + for url in urls: + f = open(TESTFN, "wb") + try: + try: + f.write(towrite) + finally: + f.close() + + r = h.file_open(Request(url)) + try: + data = r.read() + headers = r.info() + respurl = r.geturl() + finally: + r.close() + stats = os.stat(TESTFN) + modified = email_utils.formatdate(stats.st_mtime, usegmt=True) + finally: + os.remove(TESTFN) + self.assertEqual(data, towrite) + self.assertEqual(headers["Content-type"], "text/plain") + self.assertEqual(headers["Content-length"], "13") + self.assertEqual(headers["Last-modified"], modified) + self.assertEqual(respurl, url) + + for url in [ + "file://localhost:80%s" % urlpath, + "file:///file_does_not_exist.txt", + "file://%s:80%s/%s" % (socket.gethostbyname('localhost'), + os.getcwd(), TESTFN), + "file://somerandomhost.ontheinternet.com%s/%s" % + (os.getcwd(), TESTFN), + ]: + try: + f = open(TESTFN, "wb") + try: + f.write(towrite) + finally: + f.close() + + self.assertRaises(urllib_error.URLError, + h.file_open, Request(url)) + finally: + os.remove(TESTFN) + + h = urllib_request.FileHandler() + o = h.parent = MockOpener() + # XXXX why does // mean ftp (and /// mean not ftp!), and where + # is file: scheme specified? I think this is really a bug, and + # what was intended was to distinguish between URLs like: + # file:/blah.txt (a file) + # file://localhost/blah.txt (a file) + # file:///blah.txt (a file) + # file://ftp.example.com/blah.txt (an ftp URL) + for url, ftp in [ + ("file://ftp.example.com//foo.txt", False), + ("file://ftp.example.com///foo.txt", False), +# XXXX bug: fails with OSError, should be URLError + ("file://ftp.example.com/foo.txt", False), + ("file://somehost//foo/something.txt", False), + ("file://localhost//foo/something.txt", False), + ]: + req = Request(url) + try: + h.file_open(req) + # XXXX remove OSError when bug fixed + except (urllib_error.URLError, OSError): + self.assertFalse(ftp) + else: + self.assertIs(o.req, req) + self.assertEqual(req.type, "ftp") + self.assertEqual(req.type == "ftp", ftp) + + @skip26 + def test_http(self): + + h = urllib_request.AbstractHTTPHandler() + o = h.parent = MockOpener() + + url = "http://example.com/" + for method, data in [("GET", None), ("POST", b"blah")]: + req = Request(url, data, {"Foo": "bar"}) + req.timeout = None + req.add_unredirected_header("Spam", "eggs") + http = MockHTTPClass() + r = h.do_open(http, req) + + # result attributes + r.read; r.readline # wrapped MockFile methods + r.info; r.geturl # addinfourl methods + r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply() + hdrs = r.info() + hdrs.get; hdrs.__contains__ # r.info() gives dict from .getreply() + self.assertEqual(r.geturl(), url) + + self.assertEqual(http.host, "example.com") + self.assertEqual(http.level, 0) + self.assertEqual(http.method, method) + self.assertEqual(http.selector, "/") + self.assertEqual(http.req_headers, + [("Connection", "close"), + ("Foo", "bar"), ("Spam", "eggs")]) + self.assertEqual(http.data, data) + + # check socket.error converted to URLError + http.raise_on_endheaders = True + self.assertRaises(urllib_error.URLError, h.do_open, http, req) + + # Check for TypeError on POST data which is str. + req = Request("http://example.com/","badpost") + self.assertRaises(TypeError, h.do_request_, req) + + # check adding of standard headers + o.addheaders = [("Spam", "eggs")] + for data in b"", None: # POST, GET + req = Request("http://example.com/", data) + r = MockResponse(200, "OK", {}, "") + newreq = h.do_request_(req) + if data is None: # GET + self.assertNotIn("Content-length", req.unredirected_hdrs) + self.assertNotIn("Content-type", req.unredirected_hdrs) + else: # POST + self.assertEqual(req.unredirected_hdrs["Content-length"], "0") + self.assertEqual(req.unredirected_hdrs["Content-type"], + "application/x-www-form-urlencoded") + # XXX the details of Host could be better tested + self.assertEqual(req.unredirected_hdrs["Host"], "example.com") + self.assertEqual(req.unredirected_hdrs["Spam"], "eggs") + + # don't clobber existing headers + req.add_unredirected_header("Content-length", "foo") + req.add_unredirected_header("Content-type", "bar") + req.add_unredirected_header("Host", "baz") + req.add_unredirected_header("Spam", "foo") + newreq = h.do_request_(req) + self.assertEqual(req.unredirected_hdrs["Content-length"], "foo") + self.assertEqual(req.unredirected_hdrs["Content-type"], "bar") + self.assertEqual(req.unredirected_hdrs["Host"], "baz") + self.assertEqual(req.unredirected_hdrs["Spam"], "foo") + + # Check iterable body support + def iterable_body(): + yield b"one" + yield b"two" + yield b"three" + + for headers in {}, {"Content-Length": 11}: + req = Request("http://example.com/", iterable_body(), headers) + if not headers: + # Having an iterable body without a Content-Length should + # raise an exception + self.assertRaises(ValueError, h.do_request_, req) + else: + newreq = h.do_request_(req) + + # A file object. + # Test only Content-Length attribute of request. + + file_obj = io.BytesIO() + file_obj.write(b"Something\nSomething\nSomething\n") + + for headers in {}, {"Content-Length": 30}: + req = Request("http://example.com/", file_obj, headers) + if not headers: + # Having an iterable body without a Content-Length should + # raise an exception + self.assertRaises(ValueError, h.do_request_, req) + else: + newreq = h.do_request_(req) + self.assertEqual(int(newreq.get_header('Content-length')),30) + + file_obj.close() + + # array.array Iterable - Content Length is calculated + + iterable_array = array.array(text_to_native_str("I"), + [1,2,3,4]) + + for headers in {}, {"Content-Length": 16}: + req = Request("http://example.com/", iterable_array, headers) + newreq = h.do_request_(req) + self.assertEqual(int(newreq.get_header('Content-length')),16) + + @skip26 + def test_http_doubleslash(self): + # Checks the presence of any unnecessary double slash in url does not + # break anything. Previously, a double slash directly after the host + # could cause incorrect parsing. + h = urllib_request.AbstractHTTPHandler() + h.parent = MockOpener() + + data = b"" + ds_urls = [ + "http://example.com/foo/bar/baz.html", + "http://example.com//foo/bar/baz.html", + "http://example.com/foo//bar/baz.html", + "http://example.com/foo/bar//baz.html" + ] + + for ds_url in ds_urls: + ds_req = Request(ds_url, data) + + # Check whether host is determined correctly if there is no proxy + np_ds_req = h.do_request_(ds_req) + self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com") + + # Check whether host is determined correctly if there is a proxy + ds_req.set_proxy("someproxy:3128",None) + p_ds_req = h.do_request_(ds_req) + self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com") + + def test_fixpath_in_weirdurls(self): + # Issue4493: urllib2 to supply '/' when to urls where path does not + # start with'/' + + h = urllib_request.AbstractHTTPHandler() + h.parent = MockOpener() + + weird_url = 'http://www.python.org?getspam' + req = Request(weird_url) + newreq = h.do_request_(req) + self.assertEqual(newreq.host,'www.python.org') + self.assertEqual(newreq.selector,'/?getspam') + + url_without_path = 'http://www.python.org' + req = Request(url_without_path) + newreq = h.do_request_(req) + self.assertEqual(newreq.host,'www.python.org') + self.assertEqual(newreq.selector,'') + + + def test_errors(self): + h = urllib_request.HTTPErrorProcessor() + o = h.parent = MockOpener() + + url = "http://example.com/" + req = Request(url) + # all 2xx are passed through + r = MockResponse(200, "OK", {}, "", url) + newr = h.http_response(req, r) + self.assertIs(r, newr) + self.assertFalse(hasattr(o, "proto")) # o.error not called + r = MockResponse(202, "Accepted", {}, "", url) + newr = h.http_response(req, r) + self.assertIs(r, newr) + self.assertFalse(hasattr(o, "proto")) # o.error not called + r = MockResponse(206, "Partial content", {}, "", url) + newr = h.http_response(req, r) + self.assertIs(r, newr) + self.assertFalse(hasattr(o, "proto")) # o.error not called + # anything else calls o.error (and MockOpener returns None, here) + r = MockResponse(502, "Bad gateway", {}, "", url) + self.assertIsNone(h.http_response(req, r)) + self.assertEqual(o.proto, "http") # o.error called + self.assertEqual(o.args, (req, r, 502, "Bad gateway", {})) + + def test_cookies(self): + cj = MockCookieJar() + h = urllib_request.HTTPCookieProcessor(cj) + h.parent = MockOpener() + + req = Request("http://example.com/") + r = MockResponse(200, "OK", {}, "") + newreq = h.http_request(req) + self.assertIs(cj.ach_req, req) + self.assertIs(cj.ach_req, newreq) + self.assertEqual(req.origin_req_host, "example.com") + self.assertFalse(req.unverifiable) + newr = h.http_response(req, r) + self.assertIs(cj.ec_req, req) + self.assertIs(cj.ec_r, r) + self.assertIs(r, newr) + + def test_redirect(self): + from_url = "http://example.com/a.html" + to_url = "http://example.com/b.html" + h = urllib_request.HTTPRedirectHandler() + o = h.parent = MockOpener() + + # ordinary redirect behaviour + for code in 301, 302, 303, 307: + for data in None, "blah\nblah\n": + method = getattr(h, "http_error_%s" % code) + req = Request(from_url, data) + req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT + req.add_header("Nonsense", "viking=withhold") + if data is not None: + req.add_header("Content-Length", str(len(data))) + req.add_unredirected_header("Spam", "spam") + try: + method(req, MockFile(), code, "Blah", + MockHeaders({"location": to_url})) + except urllib_error.HTTPError: + # 307 in response to POST requires user OK + self.assertTrue(code == 307 and data is not None) + self.assertEqual(o.req.get_full_url(), to_url) + try: + self.assertEqual(o.req.get_method(), "GET") + except AttributeError: + self.assertFalse(o.req.data) + + # now it's a GET, there should not be headers regarding content + # (possibly dragged from before being a POST) + headers = [x.lower() for x in o.req.headers] + self.assertNotIn("content-length", headers) + self.assertNotIn("content-type", headers) + + self.assertEqual(o.req.headers["Nonsense"], + "viking=withhold") + self.assertNotIn("Spam", o.req.headers) + self.assertNotIn("Spam", o.req.unredirected_hdrs) + + # loop detection + req = Request(from_url) + req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT + def redirect(h, req, url=to_url): + h.http_error_302(req, MockFile(), 302, "Blah", + MockHeaders({"location": url})) + # Note that the *original* request shares the same record of + # redirections with the sub-requests caused by the redirections. + + # detect infinite loop redirect of a URL to itself + req = Request(from_url, origin_req_host="example.com") + count = 0 + req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT + try: + while 1: + redirect(h, req, "http://example.com/") + count = count + 1 + except urllib_error.HTTPError: + # don't stop until max_repeats, because cookies may introduce state + self.assertEqual(count, urllib_request.HTTPRedirectHandler.max_repeats) + + # detect endless non-repeating chain of redirects + req = Request(from_url, origin_req_host="example.com") + count = 0 + req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT + try: + while 1: + redirect(h, req, "http://example.com/%d" % count) + count = count + 1 + except urllib_error.HTTPError: + self.assertEqual(count, + urllib_request.HTTPRedirectHandler.max_redirections) + + + def test_invalid_redirect(self): + from_url = "http://example.com/a.html" + valid_schemes = ['http','https','ftp'] + invalid_schemes = ['file','imap','ldap'] + schemeless_url = "example.com/b.html" + h = urllib_request.HTTPRedirectHandler() + o = h.parent = MockOpener() + req = Request(from_url) + req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT + + for scheme in invalid_schemes: + invalid_url = scheme + '://' + schemeless_url + self.assertRaises(urllib_error.HTTPError, h.http_error_302, + req, MockFile(), 302, "Security Loophole", + MockHeaders({"location": invalid_url})) + + for scheme in valid_schemes: + valid_url = scheme + '://' + schemeless_url + h.http_error_302(req, MockFile(), 302, "That's fine", + MockHeaders({"location": valid_url})) + self.assertEqual(o.req.get_full_url(), valid_url) + + def test_relative_redirect(self): + from future.backports.urllib import parse as urllib_parse + from_url = "http://example.com/a.html" + relative_url = "/b.html" + h = urllib_request.HTTPRedirectHandler() + o = h.parent = MockOpener() + req = Request(from_url) + req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT + + valid_url = urllib_parse.urljoin(from_url,relative_url) + h.http_error_302(req, MockFile(), 302, "That's fine", + MockHeaders({"location": valid_url})) + self.assertEqual(o.req.get_full_url(), valid_url) + + def test_cookie_redirect(self): + # cookies shouldn't leak into redirected requests + from future.backports.http.cookiejar import CookieJar + + cj = CookieJar() + interact_netscape(cj, "http://www.example.com/", "spam=eggs") + hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n") + hdeh = urllib_request.HTTPDefaultErrorHandler() + hrh = urllib_request.HTTPRedirectHandler() + cp = urllib_request.HTTPCookieProcessor(cj) + o = build_test_opener(hh, hdeh, hrh, cp) + o.open("http://www.example.com/") + self.assertFalse(hh.req.has_header("Cookie")) + + def test_redirect_fragment(self): + redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n' + hh = MockHTTPHandler(302, 'Location: ' + redirected_url) + hdeh = urllib_request.HTTPDefaultErrorHandler() + hrh = urllib_request.HTTPRedirectHandler() + o = build_test_opener(hh, hdeh, hrh) + fp = o.open('http://www.example.com') + self.assertEqual(fp.geturl(), redirected_url.strip()) + + def test_proxy(self): + o = OpenerDirector() + ph = urllib_request.ProxyHandler(dict(http="proxy.example.com:3128")) + o.add_handler(ph) + meth_spec = [ + [("http_open", "return response")] + ] + handlers = add_ordered_mock_handlers(o, meth_spec) + + req = Request("http://acme.example.com/") + self.assertEqual(req.host, "acme.example.com") + o.open(req) + self.assertEqual(req.host, "proxy.example.com:3128") + + self.assertEqual([(handlers[0], "http_open")], + [tup[0:2] for tup in o.calls]) + + def test_proxy_no_proxy(self): + os.environ['no_proxy'] = 'python.org' + o = OpenerDirector() + ph = urllib_request.ProxyHandler(dict(http="proxy.example.com")) + o.add_handler(ph) + req = Request("http://www.perl.org/") + self.assertEqual(req.host, "www.perl.org") + o.open(req) + self.assertEqual(req.host, "proxy.example.com") + req = Request("http://www.python.org") + self.assertEqual(req.host, "www.python.org") + o.open(req) + self.assertEqual(req.host, "www.python.org") + del os.environ['no_proxy'] + + def test_proxy_no_proxy_all(self): + os.environ['no_proxy'] = '*' + o = OpenerDirector() + ph = urllib_request.ProxyHandler(dict(http="proxy.example.com")) + o.add_handler(ph) + req = Request("http://www.python.org") + self.assertEqual(req.host, "www.python.org") + o.open(req) + self.assertEqual(req.host, "www.python.org") + del os.environ['no_proxy'] + + + def test_proxy_https(self): + o = OpenerDirector() + ph = urllib_request.ProxyHandler(dict(https="proxy.example.com:3128")) + o.add_handler(ph) + meth_spec = [ + [("https_open", "return response")] + ] + handlers = add_ordered_mock_handlers(o, meth_spec) + + req = Request("https://www.example.com/") + self.assertEqual(req.host, "www.example.com") + o.open(req) + self.assertEqual(req.host, "proxy.example.com:3128") + self.assertEqual([(handlers[0], "https_open")], + [tup[0:2] for tup in o.calls]) + + def test_proxy_https_proxy_authorization(self): + o = OpenerDirector() + ph = urllib_request.ProxyHandler(dict(https='proxy.example.com:3128')) + o.add_handler(ph) + https_handler = MockHTTPSHandler() + o.add_handler(https_handler) + req = Request("https://www.example.com/") + req.add_header("Proxy-Authorization","FooBar") + req.add_header("User-Agent","Grail") + self.assertEqual(req.host, "www.example.com") + self.assertIsNone(req._tunnel_host) + o.open(req) + # Verify Proxy-Authorization gets tunneled to request. + # httpsconn req_headers do not have the Proxy-Authorization header but + # the req will have. + self.assertNotIn(("Proxy-Authorization","FooBar"), + https_handler.httpconn.req_headers) + self.assertIn(("User-Agent","Grail"), + https_handler.httpconn.req_headers) + self.assertIsNotNone(req._tunnel_host) + self.assertEqual(req.host, "proxy.example.com:3128") + self.assertEqual(req.get_header("Proxy-authorization"),"FooBar") + + # TODO: This should be only for OSX + @unittest.skipUnless(sys.platform == 'darwin', "only relevant for OSX") + def test_osx_proxy_bypass(self): + bypass = { + 'exclude_simple': False, + 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.10', + '10.0/16'] + } + # Check hosts that should trigger the proxy bypass + for host in ('foo.bar', 'www.bar.com', '127.0.0.1', '10.10.0.1', + '10.0.0.1'): + self.assertTrue(_proxy_bypass_macosx_sysconf(host, bypass), + 'expected bypass of %s to be True' % host) + # Check hosts that should not trigger the proxy bypass + for host in ('abc.foo.bar', 'bar.com', '127.0.0.2', '10.11.0.1', 'test'): + self.assertFalse(_proxy_bypass_macosx_sysconf(host, bypass), + 'expected bypass of %s to be False' % host) + + # Check the exclude_simple flag + bypass = {'exclude_simple': True, 'exceptions': []} + self.assertTrue(_proxy_bypass_macosx_sysconf('test', bypass)) + + def test_basic_auth(self, quote_char='"'): + opener = OpenerDirector() + password_manager = MockPasswordManager() + auth_handler = urllib_request.HTTPBasicAuthHandler(password_manager) + realm = "ACME Widget Store" + http_handler = MockHTTPHandler( + 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' % + (quote_char, realm, quote_char) ) + opener.add_handler(auth_handler) + opener.add_handler(http_handler) + self._test_basic_auth(opener, auth_handler, "Authorization", + realm, http_handler, password_manager, + "http://acme.example.com/protected", + "http://acme.example.com/protected", + ) + + def test_basic_auth_with_single_quoted_realm(self): + self.test_basic_auth(quote_char="'") + + def test_basic_auth_with_unquoted_realm(self): + opener = OpenerDirector() + password_manager = MockPasswordManager() + auth_handler = urllib_request.HTTPBasicAuthHandler(password_manager) + realm = "ACME Widget Store" + http_handler = MockHTTPHandler( + 401, 'WWW-Authenticate: Basic realm=%s\r\n\r\n' % realm) + opener.add_handler(auth_handler) + opener.add_handler(http_handler) + with self.assertWarns(UserWarning): + self._test_basic_auth(opener, auth_handler, "Authorization", + realm, http_handler, password_manager, + "http://acme.example.com/protected", + "http://acme.example.com/protected", + ) + + def test_proxy_basic_auth(self): + opener = OpenerDirector() + ph = urllib_request.ProxyHandler(dict(http="proxy.example.com:3128")) + opener.add_handler(ph) + password_manager = MockPasswordManager() + auth_handler = urllib_request.ProxyBasicAuthHandler(password_manager) + realm = "ACME Networks" + http_handler = MockHTTPHandler( + 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm) + opener.add_handler(auth_handler) + opener.add_handler(http_handler) + self._test_basic_auth(opener, auth_handler, "Proxy-authorization", + realm, http_handler, password_manager, + "http://acme.example.com:3128/protected", + "proxy.example.com:3128", + ) + + def test_basic_and_digest_auth_handlers(self): + # HTTPDigestAuthHandler raised an exception if it couldn't handle a 40* + # response (http://python.org/sf/1479302), where it should instead + # return None to allow another handler (especially + # HTTPBasicAuthHandler) to handle the response. + + # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must + # try digest first (since it's the strongest auth scheme), so we record + # order of calls here to check digest comes first: + class RecordingOpenerDirector(OpenerDirector): + def __init__(self): + OpenerDirector.__init__(self) + self.recorded = [] + def record(self, info): + self.recorded.append(info) + class TestDigestAuthHandler(urllib_request.HTTPDigestAuthHandler): + def http_error_401(self, *args, **kwds): + self.parent.record("digest") + urllib_request.HTTPDigestAuthHandler.http_error_401(self, + *args, **kwds) + class TestBasicAuthHandler(urllib_request.HTTPBasicAuthHandler): + def http_error_401(self, *args, **kwds): + self.parent.record("basic") + urllib_request.HTTPBasicAuthHandler.http_error_401(self, + *args, **kwds) + + opener = RecordingOpenerDirector() + password_manager = MockPasswordManager() + digest_handler = TestDigestAuthHandler(password_manager) + basic_handler = TestBasicAuthHandler(password_manager) + realm = "ACME Networks" + http_handler = MockHTTPHandler( + 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm) + opener.add_handler(basic_handler) + opener.add_handler(digest_handler) + opener.add_handler(http_handler) + + # check basic auth isn't blocked by digest handler failing + self._test_basic_auth(opener, basic_handler, "Authorization", + realm, http_handler, password_manager, + "http://acme.example.com/protected", + "http://acme.example.com/protected", + ) + # check digest was tried before basic (twice, because + # _test_basic_auth called .open() twice) + self.assertEqual(opener.recorded, ["digest", "basic"]*2) + + def test_unsupported_auth_digest_handler(self): + opener = OpenerDirector() + # While using DigestAuthHandler + digest_auth_handler = urllib_request.HTTPDigestAuthHandler(None) + http_handler = MockHTTPHandler( + 401, 'WWW-Authenticate: Kerberos\r\n\r\n') + opener.add_handler(digest_auth_handler) + opener.add_handler(http_handler) + self.assertRaises(ValueError,opener.open,"http://www.example.com") + + def test_unsupported_auth_basic_handler(self): + # While using BasicAuthHandler + opener = OpenerDirector() + basic_auth_handler = urllib_request.HTTPBasicAuthHandler(None) + http_handler = MockHTTPHandler( + 401, 'WWW-Authenticate: NTLM\r\n\r\n') + opener.add_handler(basic_auth_handler) + opener.add_handler(http_handler) + self.assertRaises(ValueError,opener.open,"http://www.example.com") + + def _test_basic_auth(self, opener, auth_handler, auth_header, + realm, http_handler, password_manager, + request_url, protected_url): + import base64 + user, password = "wile", "coyote" + + # .add_password() fed through to password manager + auth_handler.add_password(realm, request_url, user, password) + self.assertEqual(realm, password_manager.realm) + self.assertEqual(request_url, password_manager.url) + self.assertEqual(user, password_manager.user) + self.assertEqual(password, password_manager.password) + + opener.open(request_url) + + # should have asked the password manager for the username/password + self.assertEqual(password_manager.target_realm, realm) + self.assertEqual(password_manager.target_url, protected_url) + + # expect one request without authorization, then one with + self.assertEqual(len(http_handler.requests), 2) + self.assertFalse(http_handler.requests[0].has_header(auth_header)) + userpass = bytes('%s:%s' % (user, password), "ascii") + auth_hdr_value = ('Basic ' + + base64.encodebytes(userpass).strip().decode()) + self.assertEqual(http_handler.requests[1].get_header(auth_header), + auth_hdr_value) + self.assertEqual(http_handler.requests[1].unredirected_hdrs[auth_header], + auth_hdr_value) + # if the password manager can't find a password, the handler won't + # handle the HTTP auth error + password_manager.user = password_manager.password = None + http_handler.reset() + opener.open(request_url) + self.assertEqual(len(http_handler.requests), 1) + self.assertFalse(http_handler.requests[0].has_header(auth_header)) + + +class MiscTests(unittest.TestCase): + + def opener_has_handler(self, opener, handler_class): + self.assertTrue(any(h.__class__ == handler_class + for h in opener.handlers)) + + def test_build_opener(self): + class MyHTTPHandler(urllib_request.HTTPHandler): pass + class FooHandler(urllib_request.BaseHandler): + def foo_open(self): pass + class BarHandler(urllib_request.BaseHandler): + def bar_open(self): pass + + build_opener = urllib_request.build_opener + + o = build_opener(FooHandler, BarHandler) + self.opener_has_handler(o, FooHandler) + self.opener_has_handler(o, BarHandler) + + # can take a mix of classes and instances + o = build_opener(FooHandler, BarHandler()) + self.opener_has_handler(o, FooHandler) + self.opener_has_handler(o, BarHandler) + + # subclasses of default handlers override default handlers + o = build_opener(MyHTTPHandler) + self.opener_has_handler(o, MyHTTPHandler) + + # a particular case of overriding: default handlers can be passed + # in explicitly + o = build_opener() + self.opener_has_handler(o, urllib_request.HTTPHandler) + o = build_opener(urllib_request.HTTPHandler) + self.opener_has_handler(o, urllib_request.HTTPHandler) + o = build_opener(urllib_request.HTTPHandler()) + self.opener_has_handler(o, urllib_request.HTTPHandler) + + # Issue2670: multiple handlers sharing the same base class + class MyOtherHTTPHandler(urllib_request.HTTPHandler): pass + o = build_opener(MyHTTPHandler, MyOtherHTTPHandler) + self.opener_has_handler(o, MyHTTPHandler) + self.opener_has_handler(o, MyOtherHTTPHandler) + + def test_HTTPError_interface(self): + """ + Issue 13211 reveals that HTTPError didn't implement the URLError + interface even though HTTPError is a subclass of URLError. + """ + msg = 'something bad happened' + url = code = fp = None + hdrs = 'Content-Length: 42' + err = urllib_error.HTTPError(url, code, msg, hdrs, fp) + self.assertTrue(hasattr(err, 'reason')) + self.assertEqual(err.reason, 'something bad happened') + self.assertTrue(hasattr(err, 'hdrs')) + self.assertEqual(err.hdrs, 'Content-Length: 42') + expected_errmsg = 'HTTP Error %s: %s' % (err.code, err.msg) + self.assertEqual(str(err), expected_errmsg) + + +class RequestTests(unittest.TestCase): + + def setUp(self): + self.get = Request("http://www.python.org/~jeremy/") + self.post = Request("http://www.python.org/~jeremy/", + "data", + headers={"X-Test": "test"}) + + def test_method(self): + self.assertEqual("POST", self.post.get_method()) + self.assertEqual("GET", self.get.get_method()) + + def test_data(self): + self.assertFalse(self.get.data) + self.assertEqual("GET", self.get.get_method()) + self.get.data = "spam" + self.assertTrue(self.get.data) + self.assertEqual("POST", self.get.get_method()) + + def test_get_full_url(self): + self.assertEqual("http://www.python.org/~jeremy/", + self.get.get_full_url()) + + def test_selector(self): + self.assertEqual("/~jeremy/", self.get.selector) + req = Request("http://www.python.org/") + self.assertEqual("/", req.selector) + + def test_get_type(self): + self.assertEqual("http", self.get.type) + + def test_get_host(self): + self.assertEqual("www.python.org", self.get.host) + + def test_get_host_unquote(self): + req = Request("http://www.%70ython.org/") + self.assertEqual("www.python.org", req.host) + + def test_proxy(self): + self.assertFalse(self.get.has_proxy()) + self.get.set_proxy("www.perl.org", "http") + self.assertTrue(self.get.has_proxy()) + self.assertEqual("www.python.org", self.get.origin_req_host) + self.assertEqual("www.perl.org", self.get.host) + + def test_wrapped_url(self): + req = Request("") + self.assertEqual("www.python.org", req.host) + + def test_url_fragment(self): + req = Request("http://www.python.org/?qs=query#fragment=true") + self.assertEqual("/?qs=query", req.selector) + req = Request("http://www.python.org/#fun=true") + self.assertEqual("/", req.selector) + + # Issue 11703: geturl() omits fragment in the original URL. + url = 'http://docs.python.org/library/urllib2.html#OK' + req = Request(url) + self.assertEqual(req.get_full_url(), url) + + def test_HTTPError_interface_call(self): + """ + Issue 15701 - HTTPError interface has info method available from URLError + """ + err = urllib_request.HTTPError(msg="something bad happened", url=None, + code=None, hdrs='Content-Length:42', fp=None) + self.assertTrue(hasattr(err, 'reason')) + assert hasattr(err, 'reason') + assert hasattr(err, 'info') + assert callable(err.info) + try: + err.info() + except AttributeError: + self.fail('err.info call failed.') + self.assertEqual(err.info(), "Content-Length:42") + +def test_main(verbose=None): + # support.run_doctest(test_urllib2, verbose) + # support.run_doctest(urllib_request, verbose) + tests = (TrivialTests, + OpenerDirectorTests, + HandlerTests, + MiscTests, + RequestTests, + RequestHdrsTests) + support.run_unittest(*tests) + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_future/test_urllib_response.py b/tests/test_future/test_urllib_response.py new file mode 100644 index 00000000..e8f4b4f1 --- /dev/null +++ b/tests/test_future/test_urllib_response.py @@ -0,0 +1,45 @@ +"""Unit tests for code in urllib.response.""" + +from __future__ import absolute_import, division, unicode_literals + +from future.backports import urllib +import future.backports.urllib.response as urllib_response +from future.backports.test import support as test_support +from future.tests.base import unittest + + +class File(object): + + def __init__(self): + self.closed = False + + def read(self, bytes): + pass + + def readline(self): + pass + + def close(self): + self.closed = True + + +class Testaddbase(unittest.TestCase): + + # TODO(jhylton): Write tests for other functionality of addbase() + + def setUp(self): + self.fp = File() + self.addbase = urllib_response.addbase(self.fp) + + def test_with(self): + def f(): + with self.addbase as spam: + pass + self.assertFalse(self.fp.closed) + f() + self.assertTrue(self.fp.closed) + self.assertRaises(ValueError, f) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_future/test_urllib_toplevel.py b/tests/test_future/test_urllib_toplevel.py new file mode 100644 index 00000000..49ce7fc2 --- /dev/null +++ b/tests/test_future/test_urllib_toplevel.py @@ -0,0 +1,1404 @@ +"""Regression tests for urllib""" +from __future__ import absolute_import, division, unicode_literals + +import io +import os +import sys +import tempfile +from nturl2path import url2pathname, pathname2url +from base64 import b64encode +import collections + +PY2 = sys.version_info[0] == 2 + +from future.builtins import bytes, chr, hex, open, range, str, int +from future.standard_library import install_aliases +install_aliases() +from urllib import parse as urllib_parse +from urllib import request as urllib_request +from urllib import error as urllib_error +from http import client as http_client +try: + from future.moves.test import support +except ImportError: + from future.backports.test import support +if PY2: + from future.backports.email import message as email_message +else: + from email import message as email_message +# from future.backports.email import message as email_message +from future.tests.base import unittest, skip26, expectedFailurePY26, expectedFailurePY2 + + +def hexescape(char): + """Escape char as RFC 2396 specifies""" + hex_repr = hex(ord(char))[2:].upper() + if len(hex_repr) == 1: + hex_repr = "0%s" % hex_repr + return "%" + hex_repr + +# Shortcut for testing FancyURLopener +_urlopener = None + + +def urlopen(url, data=None, proxies=None): + """urlopen(url [, data]) -> open file-like object""" + global _urlopener + if proxies is not None: + opener = urllib_request.FancyURLopener(proxies=proxies) + elif not _urlopener: + with support.check_warnings( + ('FancyURLopener style of invoking requests is deprecated.', + DeprecationWarning)): + opener = urllib_request.FancyURLopener() + _urlopener = opener + else: + opener = _urlopener + if data is None: + return opener.open(url) + else: + return opener.open(url, data) + + +class FakeHTTPMixin(object): + def fakehttp(self, fakedata): + class FakeSocket(io.BytesIO): + io_refs = 1 + + def sendall(self, data): + FakeHTTPConnection.buf = data + + def makefile(self, *args, **kwds): + self.io_refs += 1 + return self + + def read(self, amt=None): + if self.closed: + return b"" + return io.BytesIO.read(self, amt) + + def readline(self, length=None): + if self.closed: + return b"" + return io.BytesIO.readline(self, length) + + def close(self): + self.io_refs -= 1 + if self.io_refs == 0: + io.BytesIO.close(self) + + class FakeHTTPConnection(http_client.HTTPConnection): + + # buffer to store data for verification in urlopen tests. + buf = None + + def connect(self): + self.sock = FakeSocket(fakedata) + + self._connection_class = http_client.HTTPConnection + http_client.HTTPConnection = FakeHTTPConnection + + def unfakehttp(self): + http_client.HTTPConnection = self._connection_class + + +class urlopen_FileTests(unittest.TestCase): + """Test urlopen() opening a temporary file. + + Try to test as much functionality as possible so as to cut down on reliance + on connecting to the Net for testing. + + """ + + def setUp(self): + # Create a temp file to use for testing + self.text = bytes("test_urllib: %s\n" % self.__class__.__name__, + "ascii") + f = open(support.TESTFN, 'wb') + try: + f.write(self.text) + finally: + f.close() + self.pathname = support.TESTFN + self.returned_obj = urlopen("file:%s" % urllib_parse.quote(self.pathname)) + + def tearDown(self): + """Shut down the open object""" + self.returned_obj.close() + os.remove(support.TESTFN) + + def test_interface(self): + # Make sure object returned by urlopen() has the specified methods + for attr in ("read", "readline", "readlines", "fileno", + "close", "info", "geturl", "getcode", "__iter__"): + self.assertTrue(hasattr(self.returned_obj, attr), + "object returned by urlopen() lacks %s attribute" % + attr) + + def test_read(self): + self.assertEqual(self.text, self.returned_obj.read()) + + def test_readline(self): + self.assertEqual(self.text, self.returned_obj.readline()) + self.assertEqual(b'', self.returned_obj.readline(), + "calling readline() after exhausting the file did not" + " return an empty string") + + def test_readlines(self): + lines_list = self.returned_obj.readlines() + self.assertEqual(len(lines_list), 1, + "readlines() returned the wrong number of lines") + self.assertEqual(lines_list[0], self.text, + "readlines() returned improper text") + + def test_fileno(self): + file_num = self.returned_obj.fileno() + self.assertIsInstance(file_num, int, "fileno() did not return an int") + self.assertEqual(os.read(file_num, len(self.text)), self.text, + "Reading on the file descriptor returned by fileno() " + "did not return the expected text") + + def test_close(self): + # Test close() by calling it here and then having it be called again + # by the tearDown() method for the test + self.returned_obj.close() + + def test_info(self): + self.assertIsInstance(self.returned_obj.info(), email_message.Message) + + def test_geturl(self): + self.assertEqual(self.returned_obj.geturl(), urllib_parse.quote(self.pathname)) + + def test_getcode(self): + self.assertIsNone(self.returned_obj.getcode()) + + def test_iter(self): + # Test iterator + # Don't need to count number of iterations since test would fail the + # instant it returned anything beyond the first line from the + # comparison. + # Use the iterator in the usual implicit way to test for ticket #4608. + for line in self.returned_obj: + self.assertEqual(line, self.text) + + def test_relativelocalfile(self): + self.assertRaises(ValueError,urllib_request.urlopen,'./' + self.pathname) + +class ProxyTests(unittest.TestCase): + + def setUp(self): + # Records changes to env vars + self.env = support.EnvironmentVarGuard() + # Delete all proxy related env vars + for k in list(os.environ): + if 'proxy' in k.lower(): + self.env.unset(k) + + def tearDown(self): + # Restore all proxy related env vars + self.env.__exit__() + del self.env + + def test_getproxies_environment_keep_no_proxies(self): + self.env.set('NO_PROXY', 'localhost') + proxies = urllib_request.getproxies_environment() + # getproxies_environment use lowered case truncated (no '_proxy') keys + self.assertEqual('localhost', proxies['no']) + # List of no_proxies with space. + self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com') + self.assertTrue(urllib_request.proxy_bypass_environment('anotherdomain.com')) + +class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin): + """Test urlopen() opening a fake http connection.""" + + def check_read(self, ver): + self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!") + try: + fp = urlopen("http://python.org/") + self.assertEqual(fp.readline(), b"Hello!") + self.assertEqual(fp.readline(), b"") + self.assertEqual(fp.geturl(), 'http://python.org/') + self.assertEqual(fp.getcode(), 200) + finally: + self.unfakehttp() + + @unittest.skip('skipping test that uses https') + def test_url_fragment(self): + # Issue #11703: geturl() omits fragments in the original URL. + url = 'http://docs.python.org/library/urllib.html#OK' + self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") + try: + fp = urllib_request.urlopen(url) + self.assertEqual(fp.geturl(), url) + finally: + self.unfakehttp() + + @unittest.skip('skipping test that uses https') + def test_willclose(self): + self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") + try: + resp = urlopen("http://www.python.org") + self.assertTrue(resp.fp.will_close) + finally: + self.unfakehttp() + + @expectedFailurePY2 + def test_read_0_9(self): + # "0.9" response accepted (but not "simple responses" without + # a status line) + self.check_read(b"0.9") + + @expectedFailurePY2 + def test_read_1_0(self): + self.check_read(b"1.0") + + @expectedFailurePY2 + def test_read_1_1(self): + self.check_read(b"1.1") + + @expectedFailurePY2 + def test_read_bogus(self): + # urlopen() should raise IOError for many error codes. + self.fakehttp(b'''HTTP/1.1 401 Authentication Required +Date: Wed, 02 Jan 2008 03:03:54 GMT +Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e +Connection: close +Content-Type: text/html; charset=iso-8859-1 +''') + try: + self.assertRaises(OSError, urlopen, "http://python.org/") + finally: + self.unfakehttp() + + @unittest.skip('skipping test that uses https') + def test_invalid_redirect(self): + # urlopen() should raise IOError for many error codes. + self.fakehttp(b'''HTTP/1.1 302 Found +Date: Wed, 02 Jan 2008 03:03:54 GMT +Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e +Location: file://guidocomputer.athome.com:/python/license +Connection: close +Content-Type: text/html; charset=iso-8859-1 +''') + try: + self.assertRaises(urllib_error.HTTPError, urlopen, + "http://python.org/") + finally: + self.unfakehttp() + + def test_empty_socket(self): + # urlopen() raises IOError if the underlying socket does not send any + # data. (#1680230) + self.fakehttp(b'') + try: + self.assertRaises(IOError, urlopen, "http://something") + finally: + self.unfakehttp() + + def test_missing_localfile(self): + # Test for #10836 + # 3.3 - URLError is not captured, explicit IOError is raised. + with self.assertRaises(IOError): + urlopen('file://localhost/a/file/which/doesnot/exists.py') + + def test_file_notexists(self): + fd, tmp_file = tempfile.mkstemp() + tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') + try: + self.assertTrue(os.path.exists(tmp_file)) + with urlopen(tmp_fileurl) as fobj: + self.assertTrue(fobj) + finally: + os.close(fd) + os.unlink(tmp_file) + self.assertFalse(os.path.exists(tmp_file)) + # 3.3 - IOError instead of URLError + with self.assertRaises(IOError): + urlopen(tmp_fileurl) + + def test_ftp_nohost(self): + test_ftp_url = 'ftp:///path' + # 3.3 - IOError instead of URLError + with self.assertRaises(IOError): + urlopen(test_ftp_url) + + def test_ftp_nonexisting(self): + # 3.3 - IOError instead of URLError + with self.assertRaises(IOError): + urlopen('ftp://localhost/a/file/which/doesnot/exists.py') + + + @expectedFailurePY2 + def test_userpass_inurl(self): + self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") + try: + fp = urlopen("http://user:pass@python.org/") + self.assertEqual(fp.readline(), b"Hello!") + self.assertEqual(fp.readline(), b"") + self.assertEqual(fp.geturl(), 'http://user:pass@python.org/') + self.assertEqual(fp.getcode(), 200) + finally: + self.unfakehttp() + + @expectedFailurePY2 + def test_userpass_inurl_w_spaces(self): + self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") + try: + userpass = "a b:c d" + url = "http://{0}@python.org/".format(userpass) + fakehttp_wrapper = http_client.HTTPConnection + authorization = ("Authorization: Basic %s\r\n" % + b64encode(userpass.encode("ASCII")).decode("ASCII")) + fp = urlopen(url) + # The authorization header must be in place + self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8")) + self.assertEqual(fp.readline(), b"Hello!") + self.assertEqual(fp.readline(), b"") + # the spaces are quoted in URL so no match + self.assertNotEqual(fp.geturl(), url) + self.assertEqual(fp.getcode(), 200) + finally: + self.unfakehttp() + + def test_URLopener_deprecation(self): + with support.check_warnings(('',DeprecationWarning)): + urllib_request.URLopener() + +class urlretrieve_FileTests(unittest.TestCase): + """Test urllib.urlretrieve() on local files""" + + def setUp(self): + # Create a list of temporary files. Each item in the list is a file + # name (absolute path or relative to the current working directory). + # All files in this list will be deleted in the tearDown method. Note, + # this only helps to makes sure temporary files get deleted, but it + # does nothing about trying to close files that may still be open. It + # is the responsibility of the developer to properly close files even + # when exceptional conditions occur. + self.tempFiles = [] + + # Create a temporary file. + self.registerFileForCleanUp(support.TESTFN) + self.text = b'testing urllib.urlretrieve' + try: + FILE = open(support.TESTFN, 'wb') + FILE.write(self.text) + FILE.close() + finally: + try: FILE.close() + except: pass + + def tearDown(self): + # Delete the temporary files. + for each in self.tempFiles: + try: os.remove(each) + except: pass + + def constructLocalFileUrl(self, filePath): + filePath = os.path.abspath(filePath) + try: + filePath.encode("utf-8") + except UnicodeEncodeError: + raise unittest.SkipTest("filePath is not encodable to utf8") + return "file://%s" % urllib_request.pathname2url(filePath) + + def createNewTempFile(self, data=b""): + """Creates a new temporary file containing the specified data, + registers the file for deletion during the test fixture tear down, and + returns the absolute path of the file.""" + + newFd, newFilePath = tempfile.mkstemp() + try: + self.registerFileForCleanUp(newFilePath) + newFile = os.fdopen(newFd, "wb") + newFile.write(data) + newFile.close() + finally: + try: newFile.close() + except: pass + return newFilePath + + def registerFileForCleanUp(self, fileName): + self.tempFiles.append(fileName) + + def test_basic(self): + # Make sure that a local file just gets its own location returned and + # a headers value is returned. + result = urllib_request.urlretrieve("file:%s" % support.TESTFN) + self.assertEqual(result[0], support.TESTFN) + self.assertIsInstance(result[1], email_message.Message, + "did not get a email.message.Message instance " + "as second returned value") + + def test_copy(self): + # Test that setting the filename argument works. + second_temp = "%s.2" % support.TESTFN + self.registerFileForCleanUp(second_temp) + result = urllib_request.urlretrieve(self.constructLocalFileUrl( + support.TESTFN), second_temp) + self.assertEqual(second_temp, result[0]) + self.assertTrue(os.path.exists(second_temp), "copy of the file was not " + "made") + FILE = open(second_temp, 'rb') + try: + text = FILE.read() + FILE.close() + finally: + try: FILE.close() + except: pass + self.assertEqual(self.text, text) + + def test_reporthook(self): + # Make sure that the reporthook works. + def hooktester(block_count, block_read_size, file_size, count_holder=[0]): + self.assertIsInstance(block_count, int) + self.assertIsInstance(block_read_size, int) + self.assertIsInstance(file_size, int) + self.assertEqual(block_count, count_holder[0]) + count_holder[0] = count_holder[0] + 1 + second_temp = "%s.2" % support.TESTFN + self.registerFileForCleanUp(second_temp) + urllib_request.urlretrieve( + self.constructLocalFileUrl(support.TESTFN), + second_temp, hooktester) + + def test_reporthook_0_bytes(self): + # Test on zero length file. Should call reporthook only 1 time. + report = [] + def hooktester(block_count, block_read_size, file_size, _report=report): + _report.append((block_count, block_read_size, file_size)) + srcFileName = self.createNewTempFile() + urllib_request.urlretrieve(self.constructLocalFileUrl(srcFileName), + support.TESTFN, hooktester) + self.assertEqual(len(report), 1) + self.assertEqual(report[0][2], 0) + + def test_reporthook_5_bytes(self): + # Test on 5 byte file. Should call reporthook only 2 times (once when + # the "network connection" is established and once when the block is + # read). + report = [] + def hooktester(block_count, block_read_size, file_size, _report=report): + _report.append((block_count, block_read_size, file_size)) + srcFileName = self.createNewTempFile(b"x" * 5) + urllib_request.urlretrieve(self.constructLocalFileUrl(srcFileName), + support.TESTFN, hooktester) + self.assertEqual(len(report), 2) + self.assertEqual(report[0][2], 5) + self.assertEqual(report[1][2], 5) + + def test_reporthook_8193_bytes(self): + # Test on 8193 byte file. Should call reporthook only 3 times (once + # when the "network connection" is established, once for the next 8192 + # bytes, and once for the last byte). + report = [] + def hooktester(block_count, block_read_size, file_size, _report=report): + _report.append((block_count, block_read_size, file_size)) + srcFileName = self.createNewTempFile(b"x" * 8193) + urllib_request.urlretrieve(self.constructLocalFileUrl(srcFileName), + support.TESTFN, hooktester) + self.assertEqual(len(report), 3) + self.assertEqual(report[0][2], 8193) + self.assertEqual(report[0][1], 8192) + self.assertEqual(report[1][1], 8192) + self.assertEqual(report[2][1], 8192) + + +class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin): + """Test urllib.urlretrieve() using fake http connections""" + + @expectedFailurePY2 + def test_short_content_raises_ContentTooShortError(self): + self.fakehttp(b'''HTTP/1.1 200 OK +Date: Wed, 02 Jan 2008 03:03:54 GMT +Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e +Connection: close +Content-Length: 100 +Content-Type: text/html; charset=iso-8859-1 + +FF +''') + + def _reporthook(par1, par2, par3): + pass + + with self.assertRaises(urllib_error.ContentTooShortError): + try: + urllib_request.urlretrieve('http://example.com/', + reporthook=_reporthook) + finally: + self.unfakehttp() + + @expectedFailurePY2 + def test_short_content_raises_ContentTooShortError_without_reporthook(self): + self.fakehttp(b'''HTTP/1.1 200 OK +Date: Wed, 02 Jan 2008 03:03:54 GMT +Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e +Connection: close +Content-Length: 100 +Content-Type: text/html; charset=iso-8859-1 + +FF +''') + with self.assertRaises(urllib_error.ContentTooShortError): + try: + urllib_request.urlretrieve('http://example.com/') + finally: + self.unfakehttp() + + +class QuotingTests(unittest.TestCase): + """Tests for urllib.quote() and urllib.quote_plus() + + According to RFC 2396 (Uniform Resource Identifiers), to escape a + character you write it as '%' + <2 character US-ASCII hex value>. + The Python code of ``'%' + hex(ord())[2:]`` escapes a + character properly. Case does not matter on the hex letters. + + The various character sets specified are: + + Reserved characters : ";/?:@&=+$," + Have special meaning in URIs and must be escaped if not being used for + their special meaning + Data characters : letters, digits, and "-_.!~*'()" + Unreserved and do not need to be escaped; can be, though, if desired + Control characters : 0x00 - 0x1F, 0x7F + Have no use in URIs so must be escaped + space : 0x20 + Must be escaped + Delimiters : '<>#%"' + Must be escaped + Unwise : "{}|\^[]`" + Must be escaped + + """ + + def test_never_quote(self): + # Make sure quote() does not quote letters, digits, and "_,.-" + do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ", + "abcdefghijklmnopqrstuvwxyz", + "0123456789", + "_.-"]) + result = urllib_parse.quote(do_not_quote) + self.assertEqual(do_not_quote, result, + "using quote(): %r != %r" % (do_not_quote, result)) + result = urllib_parse.quote_plus(do_not_quote) + self.assertEqual(do_not_quote, result, + "using quote_plus(): %r != %r" % (do_not_quote, result)) + + def test_default_safe(self): + # Test '/' is default value for 'safe' parameter + self.assertEqual(urllib_parse.quote.__defaults__[0], '/') + + def test_safe(self): + # Test setting 'safe' parameter does what it should do + quote_by_default = "<>" + result = urllib_parse.quote(quote_by_default, safe=quote_by_default) + self.assertEqual(quote_by_default, result, + "using quote(): %r != %r" % (quote_by_default, result)) + result = urllib_parse.quote_plus(quote_by_default, + safe=quote_by_default) + self.assertEqual(quote_by_default, result, + "using quote_plus(): %r != %r" % + (quote_by_default, result)) + # Safe expressed as bytes rather than str + result = urllib_parse.quote(quote_by_default, safe=b"<>") + self.assertEqual(quote_by_default, result, + "using quote(): %r != %r" % (quote_by_default, result)) + # "Safe" non-ASCII characters should have no effect + # (Since URIs are not allowed to have non-ASCII characters) + result = urllib_parse.quote("a\xfcb", encoding="latin-1", safe="\xfc") + expect = urllib_parse.quote("a\xfcb", encoding="latin-1", safe="") + self.assertEqual(expect, result, + "using quote(): %r != %r" % + (expect, result)) + # Same as above, but using a bytes rather than str + result = urllib_parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc") + expect = urllib_parse.quote("a\xfcb", encoding="latin-1", safe="") + self.assertEqual(expect, result, + "using quote(): %r != %r" % + (expect, result)) + + def test_default_quoting(self): + # Make sure all characters that should be quoted are by default sans + # space (separate test for that). + should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F + should_quote.append('<>#%"{}|\^[]`') + should_quote.append(chr(127)) # For 0x7F + should_quote = ''.join(should_quote) + for char in should_quote: + result = urllib_parse.quote(char) + self.assertEqual(hexescape(char), result, + "using quote(): " + "%s should be escaped to %s, not %s" % + (char, hexescape(char), result)) + result = urllib_parse.quote_plus(char) + self.assertEqual(hexescape(char), result, + "using quote_plus(): " + "%s should be escapes to %s, not %s" % + (char, hexescape(char), result)) + del should_quote + partial_quote = "ab[]cd" + expected = "ab%5B%5Dcd" + result = urllib_parse.quote(partial_quote) + self.assertEqual(expected, result, + "using quote(): %r != %r" % (expected, result)) + result = urllib_parse.quote_plus(partial_quote) + self.assertEqual(expected, result, + "using quote_plus(): %r != %r" % (expected, result)) + + def test_quoting_space(self): + # Make sure quote() and quote_plus() handle spaces as specified in + # their unique way + result = urllib_parse.quote(' ') + self.assertEqual(result, hexescape(' '), + "using quote(): %r != %r" % (result, hexescape(' '))) + result = urllib_parse.quote_plus(' ') + self.assertEqual(result, '+', + "using quote_plus(): %r != +" % result) + given = "a b cd e f" + expect = given.replace(' ', hexescape(' ')) + result = urllib_parse.quote(given) + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + expect = given.replace(' ', '+') + result = urllib_parse.quote_plus(given) + self.assertEqual(expect, result, + "using quote_plus(): %r != %r" % (expect, result)) + + def test_quoting_plus(self): + self.assertEqual(urllib_parse.quote_plus('alpha+beta gamma'), + 'alpha%2Bbeta+gamma') + self.assertEqual(urllib_parse.quote_plus('alpha+beta gamma', '+'), + 'alpha+beta+gamma') + # Test with bytes + self.assertEqual(urllib_parse.quote_plus(b'alpha+beta gamma'), + 'alpha%2Bbeta+gamma') + # Test with safe bytes + self.assertEqual(urllib_parse.quote_plus('alpha+beta gamma', b'+'), + 'alpha+beta+gamma') + + def test_quote_bytes(self): + # Bytes should quote directly to percent-encoded values + given = b"\xa2\xd8ab\xff" + expect = "%A2%D8ab%FF" + result = urllib_parse.quote(given) + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + # Encoding argument should raise type error on bytes input + self.assertRaises(TypeError, urllib_parse.quote, given, + encoding="latin-1") + # quote_from_bytes should work the same + result = urllib_parse.quote_from_bytes(given) + self.assertEqual(expect, result, + "using quote_from_bytes(): %r != %r" + % (expect, result)) + + def test_quote_with_unicode(self): + # Characters in Latin-1 range, encoded by default in UTF-8 + given = "\xa2\xd8ab\xff" + expect = "%C2%A2%C3%98ab%C3%BF" + result = urllib_parse.quote(given) + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + # Characters in Latin-1 range, encoded by with None (default) + result = urllib_parse.quote(given, encoding=None, errors=None) + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + # Characters in Latin-1 range, encoded with Latin-1 + given = "\xa2\xd8ab\xff" + expect = "%A2%D8ab%FF" + result = urllib_parse.quote(given, encoding="latin-1") + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + # Characters in BMP, encoded by default in UTF-8 + given = "\u6f22\u5b57" # "Kanji" + expect = "%E6%BC%A2%E5%AD%97" + result = urllib_parse.quote(given) + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + # Characters in BMP, encoded with Latin-1 + given = "\u6f22\u5b57" + self.assertRaises(UnicodeEncodeError, urllib_parse.quote, given, + encoding="latin-1") + # Characters in BMP, encoded with Latin-1, with replace error handling + given = "\u6f22\u5b57" + expect = "%3F%3F" # "??" + result = urllib_parse.quote(given, encoding="latin-1", + errors="replace") + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + # Characters in BMP, Latin-1, with xmlcharref error handling + given = "\u6f22\u5b57" + expect = "%26%2328450%3B%26%2323383%3B" # "漢字" + result = urllib_parse.quote(given, encoding="latin-1", + errors="xmlcharrefreplace") + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + + def test_quote_plus_with_unicode(self): + # Encoding (latin-1) test for quote_plus + given = "\xa2\xd8 \xff" + expect = "%A2%D8+%FF" + result = urllib_parse.quote_plus(given, encoding="latin-1") + self.assertEqual(expect, result, + "using quote_plus(): %r != %r" % (expect, result)) + # Errors test for quote_plus + given = "ab\u6f22\u5b57 cd" + expect = "ab%3F%3F+cd" + result = urllib_parse.quote_plus(given, encoding="latin-1", + errors="replace") + self.assertEqual(expect, result, + "using quote_plus(): %r != %r" % (expect, result)) + + +class UnquotingTests(unittest.TestCase): + """Tests for unquote() and unquote_plus() + + See the doc string for quoting_Tests for details on quoting and such. + + """ + + def test_unquoting(self): + # Make sure unquoting of all ASCII values works + escape_list = [] + for num in range(128): + given = hexescape(chr(num)) + expect = chr(num) + result = urllib_parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + result = urllib_parse.unquote_plus(given) + self.assertEqual(expect, result, + "using unquote_plus(): %r != %r" % + (expect, result)) + escape_list.append(given) + escape_string = ''.join(escape_list) + del escape_list + result = urllib_parse.unquote(escape_string) + self.assertEqual(result.count('%'), 1, + "using unquote(): not all characters escaped: " + "%s" % result) + self.assertRaises((TypeError, AttributeError), urllib_parse.unquote, None) + self.assertRaises((TypeError, AttributeError), urllib_parse.unquote, ()) + if sys.version_info[:2] < (3, 9): + with support.check_warnings(('', BytesWarning), quiet=True): + self.assertRaises((TypeError, AttributeError), urllib_parse.unquote, bytes(b'')) + else: + self.assertEqual(urllib_parse.unquote(bytes(b"")), "") + + def test_unquoting_badpercent(self): + # Test unquoting on bad percent-escapes + given = '%xab' + expect = given + result = urllib_parse.unquote(given) + self.assertEqual(expect, result, "using unquote(): %r != %r" + % (expect, result)) + given = '%x' + expect = given + result = urllib_parse.unquote(given) + self.assertEqual(expect, result, "using unquote(): %r != %r" + % (expect, result)) + given = '%' + expect = given + result = urllib_parse.unquote(given) + self.assertEqual(expect, result, "using unquote(): %r != %r" + % (expect, result)) + # unquote_to_bytes + given = '%xab' + expect = bytes(given, 'ascii') + result = urllib_parse.unquote_to_bytes(given) + self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" + % (expect, result)) + given = '%x' + expect = bytes(given, 'ascii') + result = urllib_parse.unquote_to_bytes(given) + self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" + % (expect, result)) + given = '%' + expect = bytes(given, 'ascii') + result = urllib_parse.unquote_to_bytes(given) + self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" + % (expect, result)) + self.assertRaises((TypeError, AttributeError), urllib_parse.unquote_to_bytes, None) + self.assertRaises((TypeError, AttributeError), urllib_parse.unquote_to_bytes, ()) + + def test_unquoting_mixed_case(self): + # Test unquoting on mixed-case hex digits in the percent-escapes + given = '%Ab%eA' + expect = b'\xab\xea' + result = urllib_parse.unquote_to_bytes(given) + self.assertEqual(expect, result, + "using unquote_to_bytes(): %r != %r" + % (expect, result)) + + def test_unquoting_parts(self): + # Make sure unquoting works when have non-quoted characters + # interspersed + given = 'ab%sd' % hexescape('c') + expect = "abcd" + result = urllib_parse.unquote(given) + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + result = urllib_parse.unquote_plus(given) + self.assertEqual(expect, result, + "using unquote_plus(): %r != %r" % (expect, result)) + + def test_unquoting_plus(self): + # Test difference between unquote() and unquote_plus() + given = "are+there+spaces..." + expect = given + result = urllib_parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + expect = given.replace('+', ' ') + result = urllib_parse.unquote_plus(given) + self.assertEqual(expect, result, + "using unquote_plus(): %r != %r" % (expect, result)) + + def test_unquote_to_bytes(self): + given = 'br%C3%BCckner_sapporo_20050930.doc' + expect = b'br\xc3\xbcckner_sapporo_20050930.doc' + result = urllib_parse.unquote_to_bytes(given) + self.assertEqual(expect, result, + "using unquote_to_bytes(): %r != %r" + % (expect, result)) + # Test on a string with unescaped non-ASCII characters + # (Technically an invalid URI; expect those characters to be UTF-8 + # encoded). + result = urllib_parse.unquote_to_bytes("\u6f22%C3%BC") + expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc" + self.assertEqual(expect, result, + "using unquote_to_bytes(): %r != %r" + % (expect, result)) + # Test with a bytes as input + given = b'%A2%D8ab%FF' + expect = b'\xa2\xd8ab\xff' + result = urllib_parse.unquote_to_bytes(given) + self.assertEqual(expect, result, + "using unquote_to_bytes(): %r != %r" + % (expect, result)) + # Test with a bytes as input, with unescaped non-ASCII bytes + # (Technically an invalid URI; expect those bytes to be preserved) + given = b'%A2\xd8ab%FF' + expect = b'\xa2\xd8ab\xff' + result = urllib_parse.unquote_to_bytes(given) + self.assertEqual(expect, result, + "using unquote_to_bytes(): %r != %r" + % (expect, result)) + + def test_unquote_with_unicode(self): + # Characters in the Latin-1 range, encoded with UTF-8 + given = 'br%C3%BCckner_sapporo_20050930.doc' + expect = 'br\u00fcckner_sapporo_20050930.doc' + result = urllib_parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + # Characters in the Latin-1 range, encoded with None (default) + result = urllib_parse.unquote(given, encoding=None, errors=None) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # Characters in the Latin-1 range, encoded with Latin-1 + result = urllib_parse.unquote('br%FCckner_sapporo_20050930.doc', + encoding="latin-1") + expect = 'br\u00fcckner_sapporo_20050930.doc' + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # Characters in BMP, encoded with UTF-8 + given = "%E6%BC%A2%E5%AD%97" + expect = "\u6f22\u5b57" # "Kanji" + result = urllib_parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # Decode with UTF-8, invalid sequence + given = "%F3%B1" + expect = "\ufffd" # Replacement character + result = urllib_parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # Decode with UTF-8, invalid sequence, replace errors + result = urllib_parse.unquote(given, errors="replace") + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # Decode with UTF-8, invalid sequence, ignoring errors + given = "%F3%B1" + expect = "" + result = urllib_parse.unquote(given, errors="ignore") + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # A mix of non-ASCII and percent-encoded characters, UTF-8 + result = urllib_parse.unquote("\u6f22%C3%BC") + expect = '\u6f22\u00fc' + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # A mix of non-ASCII and percent-encoded characters, Latin-1 + # (Note, the string contains non-Latin-1-representable characters) + result = urllib_parse.unquote("\u6f22%FC", encoding="latin-1") + expect = '\u6f22\u00fc' + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + +class urlencode_Tests(unittest.TestCase): + """Tests for urlencode()""" + + def help_inputtype(self, given, test_type): + """Helper method for testing different input types. + + 'given' must lead to only the pairs: + * 1st, 1 + * 2nd, 2 + * 3rd, 3 + + Test cannot assume anything about order. Docs make no guarantee and + have possible dictionary input. + + """ + expect_somewhere = ["1st=1", "2nd=2", "3rd=3"] + result = urllib_parse.urlencode(given) + for expected in expect_somewhere: + self.assertIn(expected, result, + "testing %s: %s not found in %s" % + (test_type, expected, result)) + self.assertEqual(result.count('&'), 2, + "testing %s: expected 2 '&'s; got %s" % + (test_type, result.count('&'))) + amp_location = result.index('&') + on_amp_left = result[amp_location - 1] + on_amp_right = result[amp_location + 1] + self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(), + "testing %s: '&' not located in proper place in %s" % + (test_type, result)) + self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps + "testing %s: " + "unexpected number of characters: %s != %s" % + (test_type, len(result), (5 * 3) + 2)) + + def test_using_mapping(self): + # Test passing in a mapping object as an argument. + self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'}, + "using dict as input type") + + def test_using_sequence(self): + # Test passing in a sequence of two-item sequences as an argument. + self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')], + "using sequence of two-item tuples as input") + + def test_quoting(self): + # Make sure keys and values are quoted using quote_plus() + given = {"&":"="} + expect = "%s=%s" % (hexescape('&'), hexescape('=')) + result = urllib_parse.urlencode(given) + self.assertEqual(expect, result) + given = {"key name":"A bunch of pluses"} + expect = "key+name=A+bunch+of+pluses" + result = urllib_parse.urlencode(given) + self.assertEqual(expect, result) + + def test_doseq(self): + # Test that passing True for 'doseq' parameter works correctly + given = {'sequence':['1', '2', '3']} + expect = "sequence=%s" % urllib_parse.quote_plus(str(['1', '2', '3'])) + result = urllib_parse.urlencode(given) + self.assertEqual(expect, result) + result = urllib_parse.urlencode(given, True) + for value in given["sequence"]: + expect = "sequence=%s" % value + self.assertIn(expect, result) + self.assertEqual(result.count('&'), 2, + "Expected 2 '&'s, got %s" % result.count('&')) + + def test_empty_sequence(self): + self.assertEqual("", urllib_parse.urlencode({})) + self.assertEqual("", urllib_parse.urlencode([])) + + def test_nonstring_values(self): + self.assertEqual("a=1", urllib_parse.urlencode({"a": 1})) + self.assertEqual("a=None", urllib_parse.urlencode({"a": None})) + + def test_nonstring_seq_values(self): + from future.backports import OrderedDict # for Py2.6 + self.assertEqual("a=1&a=2", urllib_parse.urlencode({"a": [1, 2]}, True)) + self.assertEqual("a=None&a=a", + urllib_parse.urlencode({"a": [None, "a"]}, True)) + data = OrderedDict([("a", 1), ("b", 1)]) + self.assertEqual("a=a&a=b", + urllib_parse.urlencode({"a": data}, True)) + + def test_urlencode_encoding(self): + # ASCII encoding. Expect %3F with errors="replace' + given = (('\u00a0', '\u00c1'),) + expect = '%3F=%3F' + result = urllib_parse.urlencode(given, encoding="ASCII", errors="replace") + self.assertEqual(expect, result) + + # Default is UTF-8 encoding. + given = (('\u00a0', '\u00c1'),) + expect = '%C2%A0=%C3%81' + result = urllib_parse.urlencode(given) + self.assertEqual(expect, result) + + # Latin-1 encoding. + given = (('\u00a0', '\u00c1'),) + expect = '%A0=%C1' + result = urllib_parse.urlencode(given, encoding="latin-1") + self.assertEqual(expect, result) + + def test_urlencode_encoding_doseq(self): + # ASCII Encoding. Expect %3F with errors="replace' + given = (('\u00a0', '\u00c1'),) + expect = '%3F=%3F' + result = urllib_parse.urlencode(given, doseq=True, + encoding="ASCII", errors="replace") + self.assertEqual(expect, result) + + # ASCII Encoding. On a sequence of values. + given = (("\u00a0", (1, "\u00c1")),) + expect = '%3F=1&%3F=%3F' + result = urllib_parse.urlencode(given, True, + encoding="ASCII", errors="replace") + self.assertEqual(expect, result) + + # Utf-8 + given = (("\u00a0", "\u00c1"),) + expect = '%C2%A0=%C3%81' + result = urllib_parse.urlencode(given, True) + self.assertEqual(expect, result) + + given = (("\u00a0", (42, "\u00c1")),) + expect = '%C2%A0=42&%C2%A0=%C3%81' + result = urllib_parse.urlencode(given, True) + self.assertEqual(expect, result) + + # latin-1 + given = (("\u00a0", "\u00c1"),) + expect = '%A0=%C1' + result = urllib_parse.urlencode(given, True, encoding="latin-1") + self.assertEqual(expect, result) + + given = (("\u00a0", (42, "\u00c1")),) + expect = '%A0=42&%A0=%C1' + result = urllib_parse.urlencode(given, True, encoding="latin-1") + self.assertEqual(expect, result) + + def test_urlencode_bytes(self): + given = ((b'\xa0\x24', b'\xc1\x24'),) + expect = '%A0%24=%C1%24' + result = urllib_parse.urlencode(given) + self.assertEqual(expect, result) + result = urllib_parse.urlencode(given, True) + self.assertEqual(expect, result) + + # Sequence of values + given = ((b'\xa0\x24', (42, b'\xc1\x24')),) + expect = '%A0%24=42&%A0%24=%C1%24' + result = urllib_parse.urlencode(given, True) + self.assertEqual(expect, result) + + def test_urlencode_encoding_safe_parameter(self): + + # Send '$' (\x24) as safe character + # Default utf-8 encoding + + given = ((b'\xa0\x24', b'\xc1\x24'),) + result = urllib_parse.urlencode(given, safe=":$") + expect = '%A0$=%C1$' + self.assertEqual(expect, result) + + given = ((b'\xa0\x24', b'\xc1\x24'),) + result = urllib_parse.urlencode(given, doseq=True, safe=":$") + expect = '%A0$=%C1$' + self.assertEqual(expect, result) + + # Safe parameter in sequence + given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) + expect = '%A0$=%C1$&%A0$=13&%A0$=42' + result = urllib_parse.urlencode(given, True, safe=":$") + self.assertEqual(expect, result) + + # Test all above in latin-1 encoding + + given = ((b'\xa0\x24', b'\xc1\x24'),) + result = urllib_parse.urlencode(given, safe=":$", + encoding="latin-1") + expect = '%A0$=%C1$' + self.assertEqual(expect, result) + + given = ((b'\xa0\x24', b'\xc1\x24'),) + expect = '%A0$=%C1$' + result = urllib_parse.urlencode(given, doseq=True, safe=":$", + encoding="latin-1") + + given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) + expect = '%A0$=%C1$&%A0$=13&%A0$=42' + result = urllib_parse.urlencode(given, True, safe=":$", + encoding="latin-1") + self.assertEqual(expect, result) + +class Pathname_Tests(unittest.TestCase): + """Test pathname2url() and url2pathname()""" + + def test_basic(self): + # Make sure simple tests pass + expected_path = os.path.join("parts", "of", "a", "path") + expected_url = "parts/of/a/path" + result = urllib_request.pathname2url(expected_path) + self.assertEqual(expected_url, result, + "pathname2url() failed; %s != %s" % + (result, expected_url)) + result = urllib_request.url2pathname(expected_url) + self.assertEqual(expected_path, result, + "url2pathame() failed; %s != %s" % + (result, expected_path)) + + def test_quoting(self): + # Test automatic quoting and unquoting works for pathnam2url() and + # url2pathname() respectively + given = os.path.join("needs", "quot=ing", "here") + expect = "needs/%s/here" % urllib_parse.quote("quot=ing") + result = urllib_request.pathname2url(given) + self.assertEqual(expect, result, + "pathname2url() failed; %s != %s" % + (expect, result)) + expect = given + result = urllib_request.url2pathname(result) + self.assertEqual(expect, result, + "url2pathname() failed; %s != %s" % + (expect, result)) + given = os.path.join("make sure", "using_quote") + expect = "%s/using_quote" % urllib_parse.quote("make sure") + result = urllib_request.pathname2url(given) + self.assertEqual(expect, result, + "pathname2url() failed; %s != %s" % + (expect, result)) + given = "make+sure/using_unquote" + expect = os.path.join("make+sure", "using_unquote") + result = urllib_request.url2pathname(given) + self.assertEqual(expect, result, + "url2pathname() failed; %s != %s" % + (expect, result)) + + @unittest.skipUnless(sys.platform == 'win32', + 'test specific to the urllib.url2path function.') + def test_ntpath(self): + given = ('/C:/', '///C:/', '/C|//') + expect = 'C:\\' + for url in given: + result = urllib_request.url2pathname(url) + self.assertEqual(expect, result, + 'urllib_request..url2pathname() failed; %s != %s' % + (expect, result)) + given = '///C|/path' + expect = 'C:\\path' + result = urllib_request.url2pathname(given) + self.assertEqual(expect, result, + 'urllib_request.url2pathname() failed; %s != %s' % + (expect, result)) + +class Utility_Tests(unittest.TestCase): + """Testcase to test the various utility functions in the urllib.""" + + def test_splitpasswd(self): + """Some of password examples are not sensible, but it is added to + confirming to RFC2617 and addressing issue4675. + """ + self.assertEqual(('user', 'ab'),urllib_parse.splitpasswd('user:ab')) + self.assertEqual(('user', 'a\nb'),urllib_parse.splitpasswd('user:a\nb')) + self.assertEqual(('user', 'a\tb'),urllib_parse.splitpasswd('user:a\tb')) + self.assertEqual(('user', 'a\rb'),urllib_parse.splitpasswd('user:a\rb')) + self.assertEqual(('user', 'a\fb'),urllib_parse.splitpasswd('user:a\fb')) + self.assertEqual(('user', 'a\vb'),urllib_parse.splitpasswd('user:a\vb')) + self.assertEqual(('user', 'a:b'),urllib_parse.splitpasswd('user:a:b')) + self.assertEqual(('user', 'a b'),urllib_parse.splitpasswd('user:a b')) + self.assertEqual(('user 2', 'ab'),urllib_parse.splitpasswd('user 2:ab')) + self.assertEqual(('user+1', 'a+b'),urllib_parse.splitpasswd('user+1:a+b')) + + def test_thishost(self): + """Test the urllib_request.thishost utility function returns a tuple""" + self.assertIsInstance(urllib_request.thishost(), tuple) + + +class URLopener_Tests(unittest.TestCase): + """Testcase to test the open method of URLopener class.""" + + def test_quoted_open(self): + class DummyURLopener(urllib_request.URLopener): + def open_spam(self, url): + return url + with support.check_warnings( + ('DummyURLopener style of invoking requests is deprecated.', + DeprecationWarning)): + self.assertEqual(DummyURLopener().open( + 'spam://example/ /'),'//example/%20/') + + # test the safe characters are not quoted by urlopen + self.assertEqual(DummyURLopener().open( + "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"), + "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/") + +# Just commented them out. +# Can't really tell why keep failing in windows and sparc. +# Everywhere else they work ok, but on those machines, sometimes +# fail in one of the tests, sometimes in other. I have a linux, and +# the tests go ok. +# If anybody has one of the problematic environments, please help! +# . Facundo +# +# def server(evt): +# import socket, time +# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +# serv.settimeout(3) +# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +# serv.bind(("", 9093)) +# serv.listen(5) +# try: +# conn, addr = serv.accept() +# conn.send("1 Hola mundo\n") +# cantdata = 0 +# while cantdata < 13: +# data = conn.recv(13-cantdata) +# cantdata += len(data) +# time.sleep(.3) +# conn.send("2 No more lines\n") +# conn.close() +# except socket.timeout: +# pass +# finally: +# serv.close() +# evt.set() +# +# class FTPWrapperTests(unittest.TestCase): +# +# def setUp(self): +# import ftplib, time, threading +# ftplib.FTP.port = 9093 +# self.evt = threading.Event() +# threading.Thread(target=server, args=(self.evt,)).start() +# time.sleep(.1) +# +# def tearDown(self): +# self.evt.wait() +# +# def testBasic(self): +# # connects +# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) +# ftp.close() +# +# def testTimeoutNone(self): +# # global default timeout is ignored +# import socket +# self.assertTrue(socket.getdefaulttimeout() is None) +# socket.setdefaulttimeout(30) +# try: +# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) +# finally: +# socket.setdefaulttimeout(None) +# self.assertEqual(ftp.ftp.sock.gettimeout(), 30) +# ftp.close() +# +# def testTimeoutDefault(self): +# # global default timeout is used +# import socket +# self.assertTrue(socket.getdefaulttimeout() is None) +# socket.setdefaulttimeout(30) +# try: +# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) +# finally: +# socket.setdefaulttimeout(None) +# self.assertEqual(ftp.ftp.sock.gettimeout(), 30) +# ftp.close() +# +# def testTimeoutValue(self): +# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [], +# timeout=30) +# self.assertEqual(ftp.ftp.sock.gettimeout(), 30) +# ftp.close() + +class RequestTests(unittest.TestCase): + """Unit tests for urllib_request.Request.""" + + def test_default_values(self): + Request = urllib_request.Request + request = Request("http://www.python.org") + self.assertEqual(request.get_method(), 'GET') + request = Request("http://www.python.org", {}) + self.assertEqual(request.get_method(), 'POST') + + def test_with_method_arg(self): + Request = urllib_request.Request + request = Request("http://www.python.org", method='HEAD') + self.assertEqual(request.method, 'HEAD') + self.assertEqual(request.get_method(), 'HEAD') + request = Request("http://www.python.org", {}, method='HEAD') + self.assertEqual(request.method, 'HEAD') + self.assertEqual(request.get_method(), 'HEAD') + request = Request("http://www.python.org", method='GET') + self.assertEqual(request.get_method(), 'GET') + request.method = 'HEAD' + self.assertEqual(request.get_method(), 'HEAD') + + +class URL2PathNameTests(unittest.TestCase): + + @expectedFailurePY26 + def test_converting_drive_letter(self): + self.assertEqual(url2pathname("///C|"), 'C:') + self.assertEqual(url2pathname("///C:"), 'C:') + self.assertEqual(url2pathname("///C|/"), 'C:\\') + + def test_converting_when_no_drive_letter(self): + # cannot end a raw string in \ + self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\') + self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\') + + def test_simple_compare(self): + self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"), + r'C:\foo\bar\spam.foo') + + def test_non_ascii_drive_letter(self): + self.assertRaises(IOError, url2pathname, "///\u00e8|/") + + def test_roundtrip_url2pathname(self): + list_of_paths = ['C:', + r'\\\C\test\\', + r'C:\foo\bar\spam.foo' + ] + for path in list_of_paths: + self.assertEqual(url2pathname(pathname2url(path)), path) + +class PathName2URLTests(unittest.TestCase): + + def test_converting_drive_letter(self): + self.assertEqual(pathname2url("C:"), '///C:') + self.assertEqual(pathname2url("C:\\"), '///C:') + + def test_converting_when_no_drive_letter(self): + self.assertEqual(pathname2url(r"\\\folder\test" "\\"), + '/////folder/test/') + self.assertEqual(pathname2url(r"\\folder\test" "\\"), + '////folder/test/') + self.assertEqual(pathname2url(r"\folder\test" "\\"), + '/folder/test/') + + def test_simple_compare(self): + self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'), + "///C:/foo/bar/spam.foo" ) + + def test_long_drive_letter(self): + self.assertRaises(IOError, pathname2url, "XX:\\") + + def test_roundtrip_pathname2url(self): + list_of_paths = ['///C:', + '/////folder/test/', + '///C:/foo/bar/spam.foo'] + for path in list_of_paths: + self.assertEqual(pathname2url(url2pathname(path)), path) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_future/test_urllibnet.py b/tests/test_future/test_urllibnet.py new file mode 100644 index 00000000..6a7b6d64 --- /dev/null +++ b/tests/test_future/test_urllibnet.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python3 +from __future__ import absolute_import, division, unicode_literals + +import contextlib +import socket +import sys +import os +import time + +from future import utils +from future.backports.test import support + +import future.moves.urllib.request as urllib_request +# import future.backports.email.message as email_message +# from future.backports.email.message import Message +import email.message as email_message +from email.message import Message + +from future.tests.base import unittest, skip26 +from future.builtins import int, open + + +class URLTimeoutTest(unittest.TestCase): + # XXX this test doesn't seem to test anything useful. + + TIMEOUT = 30.0 + + def setUp(self): + socket.setdefaulttimeout(self.TIMEOUT) + + def tearDown(self): + socket.setdefaulttimeout(None) + + def testURLread(self): + with support.transient_internet("www.python.org"): + f = urllib_request.urlopen("http://www.python.org/") + x = f.read() + + +class urlopenNetworkTests(unittest.TestCase): + """Tests urllib.request.urlopen using the network. + + These tests are not exhaustive. Assuming that testing using files does a + good job overall of some of the basic interface features. There are no + tests exercising the optional 'data' and 'proxies' arguments. No tests + for transparent redirection have been written. + + setUp is not used for always constructing a connection to + http://www.python.org/ since there a few tests that don't use that address + and making a connection is expensive enough to warrant minimizing unneeded + connections. + + """ + + @contextlib.contextmanager + def urlopen(self, *args, **kwargs): + resource = args[0] + with support.transient_internet(resource): + r = urllib_request.urlopen(*args, **kwargs) + try: + yield r + finally: + r.close() + + def test_basic(self): + # Simple test expected to pass. + with self.urlopen("http://www.python.org/") as open_url: + for attr in ("read", "readline", "readlines", "fileno", "close", + "info", "geturl"): + self.assertTrue(hasattr(open_url, attr), "object returned from " + "urlopen lacks the %s attribute" % attr) + self.assertTrue(open_url.read(), "calling 'read' failed") + + def test_readlines(self): + # Test both readline and readlines. + with self.urlopen("http://www.python.org/") as open_url: + self.assertIsInstance(open_url.readline(), bytes, + "readline did not return a string") + self.assertIsInstance(open_url.readlines(), list, + "readlines did not return a list") + + @unittest.skipIf(utils.PY2, 'test not applicable on Py2') + def test_info(self): + # Test 'info'. + with self.urlopen("http://www.python.org/") as open_url: + info_obj = open_url.info() + self.assertIsInstance(info_obj, email_message.Message, + "object returned by 'info' is not an " + "instance of email_message.Message") + self.assertEqual(info_obj.get_content_subtype(), "html") + + def test_geturl(self): + # Make sure same URL as opened is returned by geturl. + URL = "https://www.python.org/" # EJS: changed recently from http:// ?! + with self.urlopen(URL) as open_url: + gotten_url = open_url.geturl() + self.assertEqual(gotten_url, URL) + + def test_getcode(self): + # test getcode() with the fancy opener to get 404 error codes + URL = "http://www.python.org/XXXinvalidXXX" + with support.transient_internet(URL): + open_url = urllib_request.FancyURLopener().open(URL) + try: + code = open_url.getcode() + finally: + open_url.close() + self.assertEqual(code, 404) + + # On Windows, socket handles are not file descriptors; this + # test can't pass on Windows. + @unittest.skipIf(sys.platform in ('darwin', 'win32',), 'not appropriate for Windows') + @unittest.skipIf(utils.PY36_PLUS, 'test not applicable on Python 3.5 and higher') + @skip26 + def test_fileno(self): + # Make sure fd returned by fileno is valid. + with self.urlopen("http://www.python.org/", timeout=None) as open_url: + fd = open_url.fileno() + with os.fdopen(fd, 'rb') as f: + self.assertTrue(f.read(), "reading from file created using fd " + "returned by fileno failed") + + def test_bad_address(self): + # Make sure proper exception is raised when connecting to a bogus + # address. + bogus_domain = "sadflkjsasf.i.nvali.d" + try: + socket.gethostbyname(bogus_domain) + except (OSError, socket.error): # for Py3 and Py2 respectively + # socket.gaierror is too narrow, since getaddrinfo() may also + # fail with EAI_SYSTEM and ETIMEDOUT (seen on Ubuntu 13.04), + # i.e. Python's TimeoutError. + pass + else: + # This happens with some overzealous DNS providers such as OpenDNS + self.skipTest("%r should not resolve for test to work" % bogus_domain) + self.assertRaises(IOError, + # SF patch 809915: In Sep 2003, VeriSign started + # highjacking invalid .com and .net addresses to + # boost traffic to their own site. This test + # started failing then. One hopes the .invalid + # domain will be spared to serve its defined + # purpose. + # urllib.urlopen, "http://www.sadflkjsasadf.com/") + urllib_request.urlopen, + "http://sadflkjsasf.i.nvali.d/") + + +class urlretrieveNetworkTests(unittest.TestCase): + """Tests urllib_request.urlretrieve using the network.""" + + @contextlib.contextmanager + def urlretrieve(self, *args, **kwargs): + resource = args[0] + with support.transient_internet(resource): + file_location, info = urllib_request.urlretrieve(*args, **kwargs) + try: + yield file_location, info + finally: + support.unlink(file_location) + + def test_basic(self): + # Test basic functionality. + with self.urlretrieve("http://www.python.org/") as (file_location, info): + self.assertTrue(os.path.exists(file_location), "file location returned by" + " urlretrieve is not a valid path") + with open(file_location, 'rb') as f: + self.assertTrue(f.read(), "reading from the file location returned" + " by urlretrieve failed") + + def test_specified_path(self): + # Make sure that specifying the location of the file to write to works. + with self.urlretrieve("http://www.python.org/", + support.TESTFN) as (file_location, info): + self.assertEqual(file_location, support.TESTFN) + self.assertTrue(os.path.exists(file_location)) + with open(file_location, 'rb') as f: + self.assertTrue(f.read(), "reading from temporary file failed") + + @unittest.skipIf(utils.PY2, 'test not applicable on Py2') + def test_header(self): + # Make sure header returned as 2nd value from urlretrieve is good. + with self.urlretrieve("http://www.python.org/") as (file_location, info): + self.assertIsInstance(info, email_message.Message, + "info is not an instance of email_message.Message") + + logo = "http://www.python.org/static/community_logos/python-logo-master-v3-TM.png" + + def test_data_header(self): + with self.urlretrieve(self.logo) as (file_location, fileheaders): + datevalue = fileheaders.get('Date') + dateformat = '%a, %d %b %Y %H:%M:%S GMT' + try: + time.strptime(datevalue, dateformat) + except ValueError: + self.fail('Date value not in %r format', dateformat) + + def test_reporthook(self): + records = [] + def recording_reporthook(blocks, block_size, total_size): + records.append((blocks, block_size, total_size)) + + with self.urlretrieve(self.logo, reporthook=recording_reporthook) as ( + file_location, fileheaders): + expected_size = int(fileheaders['Content-Length']) + + records_repr = repr(records) # For use in error messages. + self.assertGreater(len(records), 1, msg="There should always be two " + "calls; the first one before the transfer starts.") + self.assertEqual(records[0][0], 0) + self.assertGreater(records[0][1], 0, + msg="block size can't be 0 in %s" % records_repr) + self.assertEqual(records[0][2], expected_size) + self.assertEqual(records[-1][2], expected_size) + + block_sizes = set(block_size for _, block_size, _ in records) + self.assertEqual(set([records[0][1]]), block_sizes, + msg="block sizes in %s must be equal" % records_repr) + self.assertGreaterEqual(records[-1][0]*records[0][1], expected_size, + msg="number of blocks * block size must be" + " >= total size in %s" % records_repr) + + +def test_main(): + # support.requires('network') + support.run_unittest(URLTimeoutTest, + urlopenNetworkTests, + urlretrieveNetworkTests) + +if __name__ == "__main__": + test_main() diff --git a/tests/test_future/test_urlparse.py b/tests/test_future/test_urlparse.py new file mode 100755 index 00000000..64e8de61 --- /dev/null +++ b/tests/test_future/test_urlparse.py @@ -0,0 +1,860 @@ +#! /usr/bin/env python3 +""" +Python 3.3 tests for urllib.parse +""" + +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from future import standard_library + +import future.backports.urllib.parse as urllib_parse +from future.tests.base import unittest + +RFC1808_BASE = "http://a/b/c/d;p?q#f" +RFC2396_BASE = "http://a/b/c/d;p?q" +RFC3986_BASE = 'http://a/b/c/d;p?q' +SIMPLE_BASE = 'http://a/b/c/d' + +# A list of test cases. Each test case is a two-tuple that contains +# a string with the query and a dictionary with the expected result. + +parse_qsl_test_cases = [ + ("", []), + ("&", []), + ("&&", []), + ("=", [('', '')]), + ("=a", [('', 'a')]), + ("a", [('a', '')]), + ("a=", [('a', '')]), + ("a=", [('a', '')]), + ("&a=b", [('a', 'b')]), + ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), + ("a=1&a=2", [('a', '1'), ('a', '2')]), + (b"", []), + (b"&", []), + (b"&&", []), + (b"=", [(b'', b'')]), + (b"=a", [(b'', b'a')]), + (b"a", [(b'a', b'')]), + (b"a=", [(b'a', b'')]), + (b"a=", [(b'a', b'')]), + (b"&a=b", [(b'a', b'b')]), + (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), + (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]), +] + +class UrlParseTestCase(unittest.TestCase): + + def checkRoundtrips(self, url, parsed, split): + result = urllib_parse.urlparse(url) + self.assertEqual(result, parsed) + t = (result.scheme, result.netloc, result.path, + result.params, result.query, result.fragment) + self.assertEqual(t, parsed) + # put it back together and it should be the same + result2 = urllib_parse.urlunparse(result) + self.assertEqual(result2, url) + self.assertEqual(result2, result.geturl()) + + # the result of geturl() is a fixpoint; we can always parse it + # again to get the same result: + result3 = urllib_parse.urlparse(result.geturl()) + self.assertEqual(result3.geturl(), result.geturl()) + self.assertEqual(result3, result) + self.assertEqual(result3.scheme, result.scheme) + self.assertEqual(result3.netloc, result.netloc) + self.assertEqual(result3.path, result.path) + self.assertEqual(result3.params, result.params) + self.assertEqual(result3.query, result.query) + self.assertEqual(result3.fragment, result.fragment) + self.assertEqual(result3.username, result.username) + self.assertEqual(result3.password, result.password) + self.assertEqual(result3.hostname, result.hostname) + self.assertEqual(result3.port, result.port) + + # check the roundtrip using urlsplit() as well + result = urllib_parse.urlsplit(url) + self.assertEqual(result, split) + t = (result.scheme, result.netloc, result.path, + result.query, result.fragment) + self.assertEqual(t, split) + result2 = urllib_parse.urlunsplit(result) + self.assertEqual(result2, url) + self.assertEqual(result2, result.geturl()) + + # check the fixpoint property of re-parsing the result of geturl() + result3 = urllib_parse.urlsplit(result.geturl()) + self.assertEqual(result3.geturl(), result.geturl()) + self.assertEqual(result3, result) + self.assertEqual(result3.scheme, result.scheme) + self.assertEqual(result3.netloc, result.netloc) + self.assertEqual(result3.path, result.path) + self.assertEqual(result3.query, result.query) + self.assertEqual(result3.fragment, result.fragment) + self.assertEqual(result3.username, result.username) + self.assertEqual(result3.password, result.password) + self.assertEqual(result3.hostname, result.hostname) + self.assertEqual(result3.port, result.port) + + def test_qsl(self): + for orig, expect in parse_qsl_test_cases: + result = urllib_parse.parse_qsl(orig, keep_blank_values=True) + self.assertEqual(result, expect, "Error parsing %r" % orig) + expect_without_blanks = [v for v in expect if len(v[1])] + result = urllib_parse.parse_qsl(orig, keep_blank_values=False) + self.assertEqual(result, expect_without_blanks, + "Error parsing %r" % orig) + + def test_roundtrips(self): + str_cases = [ + ('file:///tmp/junk.txt', + ('file', '', '/tmp/junk.txt', '', '', ''), + ('file', '', '/tmp/junk.txt', '', '')), + ('imap://mail.python.org/mbox1', + ('imap', 'mail.python.org', '/mbox1', '', '', ''), + ('imap', 'mail.python.org', '/mbox1', '', '')), + ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf', + ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', + '', '', ''), + ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', + '', '')), + ('nfs://server/path/to/file.txt', + ('nfs', 'server', '/path/to/file.txt', '', '', ''), + ('nfs', 'server', '/path/to/file.txt', '', '')), + ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/', + ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', + '', '', ''), + ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', + '', '')), + ('git+ssh://git@github.com/user/project.git', + ('git+ssh', 'git@github.com','/user/project.git', + '','',''), + ('git+ssh', 'git@github.com','/user/project.git', + '', '')), + ] + def _encode(t): + return (t[0].encode('ascii'), + tuple(x.encode('ascii') for x in t[1]), + tuple(x.encode('ascii') for x in t[2])) + bytes_cases = [_encode(x) for x in str_cases] + for url, parsed, split in str_cases + bytes_cases: + self.checkRoundtrips(url, parsed, split) + + def test_http_roundtrips(self): + # urllib_parse.urlsplit treats 'http:' as an optimized special case, + # so we test both 'http:' and 'https:' in all the following. + # Three cheers for white box knowledge! + str_cases = [ + ('://www.python.org', + ('www.python.org', '', '', '', ''), + ('www.python.org', '', '', '')), + ('://www.python.org#abc', + ('www.python.org', '', '', '', 'abc'), + ('www.python.org', '', '', 'abc')), + ('://www.python.org?q=abc', + ('www.python.org', '', '', 'q=abc', ''), + ('www.python.org', '', 'q=abc', '')), + ('://www.python.org/#abc', + ('www.python.org', '/', '', '', 'abc'), + ('www.python.org', '/', '', 'abc')), + ('://a/b/c/d;p?q#f', + ('a', '/b/c/d', 'p', 'q', 'f'), + ('a', '/b/c/d;p', 'q', 'f')), + ] + def _encode(t): + return (t[0].encode('ascii'), + tuple(x.encode('ascii') for x in t[1]), + tuple(x.encode('ascii') for x in t[2])) + bytes_cases = [_encode(x) for x in str_cases] + str_schemes = ('http', 'https') + bytes_schemes = (b'http', b'https') + str_tests = str_schemes, str_cases + bytes_tests = bytes_schemes, bytes_cases + for schemes, test_cases in (str_tests, bytes_tests): + for scheme in schemes: + for url, parsed, split in test_cases: + url = scheme + url + parsed = (scheme,) + parsed + split = (scheme,) + split + self.checkRoundtrips(url, parsed, split) + + def checkJoin(self, base, relurl, expected): + str_components = (base, relurl, expected) + self.assertEqual(urllib_parse.urljoin(base, relurl), expected) + bytes_components = baseb, relurlb, expectedb = [ + x.encode('ascii') for x in str_components] + self.assertEqual(urllib_parse.urljoin(baseb, relurlb), expectedb) + + def test_unparse_parse(self): + str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',] + bytes_cases = [x.encode('ascii') for x in str_cases] + for u in str_cases + bytes_cases: + self.assertEqual(urllib_parse.urlunsplit(urllib_parse.urlsplit(u)), u) + self.assertEqual(urllib_parse.urlunparse(urllib_parse.urlparse(u)), u) + + def test_RFC1808(self): + # "normal" cases from RFC 1808: + self.checkJoin(RFC1808_BASE, 'g:h', 'g:h') + self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g') + self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g') + self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/') + self.checkJoin(RFC1808_BASE, '/g', 'http://a/g') + self.checkJoin(RFC1808_BASE, '//g', 'http://g') + self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y') + self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') + self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s') + self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s') + self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') + self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s') + self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x') + self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') + self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/') + self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/') + self.checkJoin(RFC1808_BASE, '..', 'http://a/b/') + self.checkJoin(RFC1808_BASE, '../', 'http://a/b/') + self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g') + self.checkJoin(RFC1808_BASE, '../..', 'http://a/') + self.checkJoin(RFC1808_BASE, '../../', 'http://a/') + self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g') + + # "abnormal" cases from RFC 1808: + self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f') + self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g') + self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g') + self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g') + self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g') + self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.') + self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g') + self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..') + self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g') + self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g') + self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/') + self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h') + self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h') + + # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808), + # so we'll not actually run these tests (which expect 1808 behavior). + #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g') + #self.checkJoin(RFC1808_BASE, 'http:', 'http:') + + def test_RFC2368(self): + # Issue 11467: path that starts with a number is not parsed correctly + self.assertEqual(urllib_parse.urlparse('mailto:1337@example.org'), + ('mailto', '', '1337@example.org', '', '', '')) + + def test_RFC2396(self): + # cases from RFC 2396 + + + self.checkJoin(RFC2396_BASE, 'g:h', 'g:h') + self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g') + self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g') + self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/') + self.checkJoin(RFC2396_BASE, '/g', 'http://a/g') + self.checkJoin(RFC2396_BASE, '//g', 'http://g') + self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y') + self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s') + self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s') + self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s') + self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x') + self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') + self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/') + self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/') + self.checkJoin(RFC2396_BASE, '..', 'http://a/b/') + self.checkJoin(RFC2396_BASE, '../', 'http://a/b/') + self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g') + self.checkJoin(RFC2396_BASE, '../..', 'http://a/') + self.checkJoin(RFC2396_BASE, '../../', 'http://a/') + self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g') + self.checkJoin(RFC2396_BASE, '', RFC2396_BASE) + self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g') + self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g') + self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g') + self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g') + self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.') + self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g') + self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..') + self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g') + self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g') + self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/') + self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h') + self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h') + self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y') + self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y') + self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') + self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x') + self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') + self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x') + + def test_RFC3986(self): + # Test cases from RFC3986 + self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') + self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x') + self.checkJoin(RFC3986_BASE, 'g:h','g:h') + self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g') + self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g') + self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/') + self.checkJoin(RFC3986_BASE, '/g','http://a/g') + self.checkJoin(RFC3986_BASE, '//g','http://g') + self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') + self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y') + self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s') + self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s') + self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s') + self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x') + self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x') + self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s') + self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q') + self.checkJoin(RFC3986_BASE, '.','http://a/b/c/') + self.checkJoin(RFC3986_BASE, './','http://a/b/c/') + self.checkJoin(RFC3986_BASE, '..','http://a/b/') + self.checkJoin(RFC3986_BASE, '../','http://a/b/') + self.checkJoin(RFC3986_BASE, '../g','http://a/b/g') + self.checkJoin(RFC3986_BASE, '../..','http://a/') + self.checkJoin(RFC3986_BASE, '../../','http://a/') + self.checkJoin(RFC3986_BASE, '../../g','http://a/g') + + #Abnormal Examples + + # The 'abnormal scenarios' are incompatible with RFC2986 parsing + # Tests are here for reference. + + #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g') + #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g') + #self.checkJoin(RFC3986_BASE, '/./g','http://a/g') + #self.checkJoin(RFC3986_BASE, '/../g','http://a/g') + + self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.') + self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g') + self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..') + self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g') + self.checkJoin(RFC3986_BASE, './../g','http://a/b/g') + self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/') + self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h') + self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h') + self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y') + self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y') + self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x') + self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x') + self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x') + self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x') + #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser + self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser + + # Test for issue9721 + self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x') + + def test_urljoins(self): + self.checkJoin(SIMPLE_BASE, 'g:h','g:h') + self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') + self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d') + self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g') + self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g') + self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/') + self.checkJoin(SIMPLE_BASE, '/g','http://a/g') + self.checkJoin(SIMPLE_BASE, '//g','http://g') + self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y') + self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y') + self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x') + self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/') + self.checkJoin(SIMPLE_BASE, './','http://a/b/c/') + self.checkJoin(SIMPLE_BASE, '..','http://a/b/') + self.checkJoin(SIMPLE_BASE, '../','http://a/b/') + self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g') + self.checkJoin(SIMPLE_BASE, '../..','http://a/') + self.checkJoin(SIMPLE_BASE, '../../g','http://a/g') + self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g') + self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g') + self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/') + self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g') + self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h') + self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h') + self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') + self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d') + self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y') + self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y') + self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x') + self.checkJoin('http:///', '..','http:///') + self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x') + self.checkJoin('', 'http://a/./g', 'http://a/./g') + self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2') + self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2') + + def test_RFC2732(self): + str_cases = [ + ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), + ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), + ('http://[::1]:5432/foo/', '::1', 5432), + ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432), + ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432), + ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/', + 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432), + ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432), + ('http://[::ffff:12.34.56.78]:5432/foo/', + '::ffff:12.34.56.78', 5432), + ('http://Test.python.org/foo/', 'test.python.org', None), + ('http://12.34.56.78/foo/', '12.34.56.78', None), + ('http://[::1]/foo/', '::1', None), + ('http://[dead:beef::1]/foo/', 'dead:beef::1', None), + ('http://[dead:beef::]/foo/', 'dead:beef::', None), + ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/', + 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), + ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None), + ('http://[::ffff:12.34.56.78]/foo/', + '::ffff:12.34.56.78', None), + ] + def _encode(t): + return t[0].encode('ascii'), t[1].encode('ascii'), t[2] + bytes_cases = [_encode(x) for x in str_cases] + for url, hostname, port in str_cases + bytes_cases: + urlparsed = urllib_parse.urlparse(url) + self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port)) + + str_cases = [ + 'http://::12.34.56.78]/', + 'http://[::1/foo/', + 'ftp://[::1/foo/bad]/bad', + 'http://[::1/foo/bad]/bad', + 'http://[::ffff:12.34.56.78'] + bytes_cases = [x.encode('ascii') for x in str_cases] + for invalid_url in str_cases + bytes_cases: + self.assertRaises(ValueError, urllib_parse.urlparse, invalid_url) + + def test_urldefrag(self): + str_cases = [ + ('http://python.org#frag', 'http://python.org', 'frag'), + ('http://python.org', 'http://python.org', ''), + ('http://python.org/#frag', 'http://python.org/', 'frag'), + ('http://python.org/', 'http://python.org/', ''), + ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'), + ('http://python.org/?q', 'http://python.org/?q', ''), + ('http://python.org/p#frag', 'http://python.org/p', 'frag'), + ('http://python.org/p?q', 'http://python.org/p?q', ''), + (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'), + (RFC2396_BASE, 'http://a/b/c/d;p?q', ''), + ] + def _encode(t): + return type(t)(x.encode('ascii') for x in t) + bytes_cases = [_encode(x) for x in str_cases] + for url, defrag, frag in str_cases + bytes_cases: + result = urllib_parse.urldefrag(url) + self.assertEqual(result.geturl(), url) + self.assertEqual(result, (defrag, frag)) + self.assertEqual(result.url, defrag) + self.assertEqual(result.fragment, frag) + + def test_urlsplit_attributes(self): + url = "HTTP://WWW.PYTHON.ORG/doc/#frag" + p = urllib_parse.urlsplit(url) + self.assertEqual(p.scheme, "http") + self.assertEqual(p.netloc, "WWW.PYTHON.ORG") + self.assertEqual(p.path, "/doc/") + self.assertEqual(p.query, "") + self.assertEqual(p.fragment, "frag") + self.assertEqual(p.username, None) + self.assertEqual(p.password, None) + self.assertEqual(p.hostname, "www.python.org") + self.assertEqual(p.port, None) + # geturl() won't return exactly the original URL in this case + # since the scheme is always case-normalized + # We handle this by ignoring the first 4 characters of the URL + self.assertEqual(p.geturl()[4:], url[4:]) + + url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag" + p = urllib_parse.urlsplit(url) + self.assertEqual(p.scheme, "http") + self.assertEqual(p.netloc, "User:Pass@www.python.org:080") + self.assertEqual(p.path, "/doc/") + self.assertEqual(p.query, "query=yes") + self.assertEqual(p.fragment, "frag") + self.assertEqual(p.username, "User") + self.assertEqual(p.password, "Pass") + self.assertEqual(p.hostname, "www.python.org") + self.assertEqual(p.port, 80) + self.assertEqual(p.geturl(), url) + + # Addressing issue1698, which suggests Username can contain + # "@" characters. Though not RFC compliant, many ftp sites allow + # and request email addresses as usernames. + + url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" + p = urllib_parse.urlsplit(url) + self.assertEqual(p.scheme, "http") + self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080") + self.assertEqual(p.path, "/doc/") + self.assertEqual(p.query, "query=yes") + self.assertEqual(p.fragment, "frag") + self.assertEqual(p.username, "User@example.com") + self.assertEqual(p.password, "Pass") + self.assertEqual(p.hostname, "www.python.org") + self.assertEqual(p.port, 80) + self.assertEqual(p.geturl(), url) + + # And check them all again, only with bytes this time + url = b"HTTP://WWW.PYTHON.ORG/doc/#frag" + p = urllib_parse.urlsplit(url) + self.assertEqual(p.scheme, b"http") + self.assertEqual(p.netloc, b"WWW.PYTHON.ORG") + self.assertEqual(p.path, b"/doc/") + self.assertEqual(p.query, b"") + self.assertEqual(p.fragment, b"frag") + self.assertEqual(p.username, None) + self.assertEqual(p.password, None) + self.assertEqual(p.hostname, b"www.python.org") + self.assertEqual(p.port, None) + self.assertEqual(p.geturl()[4:], url[4:]) + + url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag" + p = urllib_parse.urlsplit(url) + self.assertEqual(p.scheme, b"http") + self.assertEqual(p.netloc, b"User:Pass@www.python.org:080") + self.assertEqual(p.path, b"/doc/") + self.assertEqual(p.query, b"query=yes") + self.assertEqual(p.fragment, b"frag") + self.assertEqual(p.username, b"User") + self.assertEqual(p.password, b"Pass") + self.assertEqual(p.hostname, b"www.python.org") + self.assertEqual(p.port, 80) + self.assertEqual(p.geturl(), url) + + url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" + p = urllib_parse.urlsplit(url) + self.assertEqual(p.scheme, b"http") + self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080") + self.assertEqual(p.path, b"/doc/") + self.assertEqual(p.query, b"query=yes") + self.assertEqual(p.fragment, b"frag") + self.assertEqual(p.username, b"User@example.com") + self.assertEqual(p.password, b"Pass") + self.assertEqual(p.hostname, b"www.python.org") + self.assertEqual(p.port, 80) + self.assertEqual(p.geturl(), url) + + # Verify an illegal port is returned as None + url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag" + p = urllib_parse.urlsplit(url) + self.assertEqual(p.port, None) + + def test_attributes_bad_port(self): + """Check handling of non-integer ports.""" + p = urllib_parse.urlsplit("http://www.example.net:foo") + self.assertEqual(p.netloc, "www.example.net:foo") + self.assertRaises(ValueError, lambda: p.port) + + p = urllib_parse.urlparse("http://www.example.net:foo") + self.assertEqual(p.netloc, "www.example.net:foo") + self.assertRaises(ValueError, lambda: p.port) + + # Once again, repeat ourselves to test bytes + p = urllib_parse.urlsplit(b"http://www.example.net:foo") + self.assertEqual(p.netloc, b"www.example.net:foo") + self.assertRaises(ValueError, lambda: p.port) + + p = urllib_parse.urlparse(b"http://www.example.net:foo") + self.assertEqual(p.netloc, b"www.example.net:foo") + self.assertRaises(ValueError, lambda: p.port) + + def test_attributes_without_netloc(self): + # This example is straight from RFC 3261. It looks like it + # should allow the username, hostname, and port to be filled + # in, but doesn't. Since it's a URI and doesn't use the + # scheme://netloc syntax, the netloc and related attributes + # should be left empty. + uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" + p = urllib_parse.urlsplit(uri) + self.assertEqual(p.netloc, "") + self.assertEqual(p.username, None) + self.assertEqual(p.password, None) + self.assertEqual(p.hostname, None) + self.assertEqual(p.port, None) + self.assertEqual(p.geturl(), uri) + + p = urllib_parse.urlparse(uri) + self.assertEqual(p.netloc, "") + self.assertEqual(p.username, None) + self.assertEqual(p.password, None) + self.assertEqual(p.hostname, None) + self.assertEqual(p.port, None) + self.assertEqual(p.geturl(), uri) + + # You guessed it, repeating the test with bytes input + uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" + p = urllib_parse.urlsplit(uri) + self.assertEqual(p.netloc, b"") + self.assertEqual(p.username, None) + self.assertEqual(p.password, None) + self.assertEqual(p.hostname, None) + self.assertEqual(p.port, None) + self.assertEqual(p.geturl(), uri) + + p = urllib_parse.urlparse(uri) + self.assertEqual(p.netloc, b"") + self.assertEqual(p.username, None) + self.assertEqual(p.password, None) + self.assertEqual(p.hostname, None) + self.assertEqual(p.port, None) + self.assertEqual(p.geturl(), uri) + + def test_noslash(self): + # Issue 1637: http://foo.com?query is legal + self.assertEqual(urllib_parse.urlparse("http://example.com?blahblah=/foo"), + ('http', 'example.com', '', '', 'blahblah=/foo', '')) + self.assertEqual(urllib_parse.urlparse(b"http://example.com?blahblah=/foo"), + (b'http', b'example.com', b'', b'', b'blahblah=/foo', b'')) + + def test_withoutscheme(self): + # Test urlparse without scheme + # Issue 754016: urlparse goes wrong with IP:port without scheme + # RFC 1808 specifies that netloc should start with //, urlparse expects + # the same, otherwise it classifies the portion of url as path. + self.assertEqual(urllib_parse.urlparse("path"), + ('','','path','','','')) + self.assertEqual(urllib_parse.urlparse("//www.python.org:80"), + ('','www.python.org:80','','','','')) + self.assertEqual(urllib_parse.urlparse("http://www.python.org:80"), + ('http','www.python.org:80','','','','')) + # Repeat for bytes input + self.assertEqual(urllib_parse.urlparse(b"path"), + (b'',b'',b'path',b'',b'',b'')) + self.assertEqual(urllib_parse.urlparse(b"//www.python.org:80"), + (b'',b'www.python.org:80',b'',b'',b'',b'')) + self.assertEqual(urllib_parse.urlparse(b"http://www.python.org:80"), + (b'http',b'www.python.org:80',b'',b'',b'',b'')) + + def test_portseparator(self): + # Issue 754016 makes changes for port separator ':' from scheme separator + self.assertEqual(urllib_parse.urlparse("path:80"), + ('','','path:80','','','')) + self.assertEqual(urllib_parse.urlparse("http:"),('http','','','','','')) + self.assertEqual(urllib_parse.urlparse("https:"),('https','','','','','')) + self.assertEqual(urllib_parse.urlparse("http://www.python.org:80"), + ('http','www.python.org:80','','','','')) + # As usual, need to check bytes input as well + self.assertEqual(urllib_parse.urlparse(b"path:80"), + (b'',b'',b'path:80',b'',b'',b'')) + self.assertEqual(urllib_parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b'')) + self.assertEqual(urllib_parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b'')) + self.assertEqual(urllib_parse.urlparse(b"http://www.python.org:80"), + (b'http',b'www.python.org:80',b'',b'',b'',b'')) + + def test_usingsys(self): + # Issue 3314: sys module is used in the error + self.assertRaises(TypeError, urllib_parse.urlencode, "foo") + + def test_anyscheme(self): + # Issue 7904: s3://foo.com/stuff has netloc "foo.com". + self.assertEqual(urllib_parse.urlparse("s3://foo.com/stuff"), + ('s3', 'foo.com', '/stuff', '', '', '')) + self.assertEqual(urllib_parse.urlparse("x-newscheme://foo.com/stuff"), + ('x-newscheme', 'foo.com', '/stuff', '', '', '')) + self.assertEqual(urllib_parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"), + ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment')) + self.assertEqual(urllib_parse.urlparse("x-newscheme://foo.com/stuff?query"), + ('x-newscheme', 'foo.com', '/stuff', '', 'query', '')) + + # And for bytes... + self.assertEqual(urllib_parse.urlparse(b"s3://foo.com/stuff"), + (b's3', b'foo.com', b'/stuff', b'', b'', b'')) + self.assertEqual(urllib_parse.urlparse(b"x-newscheme://foo.com/stuff"), + (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b'')) + self.assertEqual(urllib_parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"), + (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment')) + self.assertEqual(urllib_parse.urlparse(b"x-newscheme://foo.com/stuff?query"), + (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'')) + + def test_mixed_types_rejected(self): + # Several functions that process either strings or ASCII encoded bytes + # accept multiple arguments. Check they reject mixed type input + with self.assertRaisesRegex(TypeError, "Cannot mix str"): + urllib_parse.urlparse("www.python.org", b"http") + with self.assertRaisesRegex(TypeError, "Cannot mix str"): + urllib_parse.urlparse(b"www.python.org", "http") + with self.assertRaisesRegex(TypeError, "Cannot mix str"): + urllib_parse.urlsplit("www.python.org", b"http") + with self.assertRaisesRegex(TypeError, "Cannot mix str"): + urllib_parse.urlsplit(b"www.python.org", "http") + with self.assertRaisesRegex(TypeError, "Cannot mix str"): + urllib_parse.urlunparse(( b"http", "www.python.org","","","","")) + with self.assertRaisesRegex(TypeError, "Cannot mix str"): + urllib_parse.urlunparse(("http", b"www.python.org","","","","")) + with self.assertRaisesRegex(TypeError, "Cannot mix str"): + urllib_parse.urlunsplit((b"http", "www.python.org","","","")) + with self.assertRaisesRegex(TypeError, "Cannot mix str"): + urllib_parse.urlunsplit(("http", b"www.python.org","","","")) + with self.assertRaisesRegex(TypeError, "Cannot mix str"): + urllib_parse.urljoin("http://python.org", b"http://python.org") + with self.assertRaisesRegex(TypeError, "Cannot mix str"): + urllib_parse.urljoin(b"http://python.org", "http://python.org") + + def _check_result_type(self, str_type): + num_args = len(str_type._fields) + bytes_type = str_type._encoded_counterpart + self.assertIs(bytes_type._decoded_counterpart, str_type) + str_args = ('',) * num_args + bytes_args = (b'',) * num_args + str_result = str_type(*str_args) + bytes_result = bytes_type(*bytes_args) + encoding = 'ascii' + errors = 'strict' + self.assertEqual(str_result, str_args) + self.assertEqual(bytes_result.decode(), str_args) + self.assertEqual(bytes_result.decode(), str_result) + self.assertEqual(bytes_result.decode(encoding), str_args) + self.assertEqual(bytes_result.decode(encoding), str_result) + self.assertEqual(bytes_result.decode(encoding, errors), str_args) + self.assertEqual(bytes_result.decode(encoding, errors), str_result) + self.assertEqual(bytes_result, bytes_args) + self.assertEqual(str_result.encode(), bytes_args) + self.assertEqual(str_result.encode(), bytes_result) + self.assertEqual(str_result.encode(encoding), bytes_args) + self.assertEqual(str_result.encode(encoding), bytes_result) + self.assertEqual(str_result.encode(encoding, errors), bytes_args) + self.assertEqual(str_result.encode(encoding, errors), bytes_result) + + def test_result_pairs(self): + # Check encoding and decoding between result pairs + result_types = [ + urllib_parse.DefragResult, + urllib_parse.SplitResult, + urllib_parse.ParseResult, + ] + for result_type in result_types: + self._check_result_type(result_type) + + def test_parse_qs_encoding(self): + result = urllib_parse.parse_qs("key=\u0141%E9", encoding="latin-1") + self.assertEqual(result, {'key': ['\u0141\xE9']}) + result = urllib_parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8") + self.assertEqual(result, {'key': ['\u0141\xE9']}) + result = urllib_parse.parse_qs("key=\u0141%C3%A9", encoding="ascii") + self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']}) + result = urllib_parse.parse_qs("key=\u0141%E9-", encoding="ascii") + self.assertEqual(result, {'key': ['\u0141\ufffd-']}) + result = urllib_parse.parse_qs("key=\u0141%E9-", encoding="ascii", + errors="ignore") + self.assertEqual(result, {'key': ['\u0141-']}) + + def test_parse_qsl_encoding(self): + result = urllib_parse.parse_qsl("key=\u0141%E9", encoding="latin-1") + self.assertEqual(result, [('key', '\u0141\xE9')]) + result = urllib_parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8") + self.assertEqual(result, [('key', '\u0141\xE9')]) + result = urllib_parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii") + self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')]) + result = urllib_parse.parse_qsl("key=\u0141%E9-", encoding="ascii") + self.assertEqual(result, [('key', '\u0141\ufffd-')]) + result = urllib_parse.parse_qsl("key=\u0141%E9-", encoding="ascii", + errors="ignore") + self.assertEqual(result, [('key', '\u0141-')]) + + def test_splitnport(self): + # Normal cases are exercised by other tests; ensure that we also + # catch cases with no port specified. (testcase ensuring coverage) + result = urllib_parse.splitnport('parrot:88') + self.assertEqual(result, ('parrot', 88)) + result = urllib_parse.splitnport('parrot') + self.assertEqual(result, ('parrot', -1)) + result = urllib_parse.splitnport('parrot', 55) + self.assertEqual(result, ('parrot', 55)) + result = urllib_parse.splitnport('parrot:') + self.assertEqual(result, ('parrot', None)) + + def test_splitquery(self): + # Normal cases are exercised by other tests; ensure that we also + # catch cases with no port specified (testcase ensuring coverage) + result = urllib_parse.splitquery('http://python.org/fake?foo=bar') + self.assertEqual(result, ('http://python.org/fake', 'foo=bar')) + result = urllib_parse.splitquery('http://python.org/fake?foo=bar?') + self.assertEqual(result, ('http://python.org/fake?foo=bar', '')) + result = urllib_parse.splitquery('http://python.org/fake') + self.assertEqual(result, ('http://python.org/fake', None)) + + def test_splitvalue(self): + # Normal cases are exercised by other tests; test pathological cases + # with no key/value pairs. (testcase ensuring coverage) + result = urllib_parse.splitvalue('foo=bar') + self.assertEqual(result, ('foo', 'bar')) + result = urllib_parse.splitvalue('foo=') + self.assertEqual(result, ('foo', '')) + result = urllib_parse.splitvalue('foobar') + self.assertEqual(result, ('foobar', None)) + + def test_to_bytes(self): + result = urllib_parse.to_bytes('http://www.python.org') + self.assertEqual(result, 'http://www.python.org') + self.assertRaises(UnicodeError, urllib_parse.to_bytes, + 'http://www.python.org/medi\u00e6val') + + def test_urlencode_sequences(self): + # Other tests incidentally urlencode things; test non-covered cases: + # Sequence and object values. + result = urllib_parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True) + # we cannot rely on ordering here + assert set(result.split('&')) == set(['a=1', 'a=2', 'b=3', 'b=4', 'b=5']) + + class Trivial(object): + def __str__(self): + return 'trivial' + + result = urllib_parse.urlencode({'a': Trivial()}, True) + self.assertEqual(result, 'a=trivial') + + def test_quote_from_bytes(self): + self.assertRaises(TypeError, urllib_parse.quote_from_bytes, 'foo') + result = urllib_parse.quote_from_bytes(b'archaeological arcana') + self.assertEqual(result, 'archaeological%20arcana') + result = urllib_parse.quote_from_bytes(b'') + self.assertEqual(result, '') + + def test_unquote_to_bytes(self): + result = urllib_parse.unquote_to_bytes('abc%20def') + self.assertEqual(result, b'abc def') + result = urllib_parse.unquote_to_bytes('') + self.assertEqual(result, b'') + + def test_quote_errors(self): + self.assertRaises(TypeError, urllib_parse.quote, b'foo', + encoding='utf-8') + self.assertRaises(TypeError, urllib_parse.quote, b'foo', errors='strict') + + def test_issue14072(self): + p1 = urllib_parse.urlsplit('tel:+31-641044153') + self.assertEqual(p1.scheme, 'tel') + self.assertEqual(p1.path, '+31-641044153') + p2 = urllib_parse.urlsplit('tel:+31641044153') + self.assertEqual(p2.scheme, 'tel') + self.assertEqual(p2.path, '+31641044153') + # assert the behavior for urlparse + p1 = urllib_parse.urlparse('tel:+31-641044153') + self.assertEqual(p1.scheme, 'tel') + self.assertEqual(p1.path, '+31-641044153') + p2 = urllib_parse.urlparse('tel:+31641044153') + self.assertEqual(p2.scheme, 'tel') + self.assertEqual(p2.path, '+31641044153') + + def test_telurl_params(self): + p1 = urllib_parse.urlparse('tel:123-4;phone-context=+1-650-516') + self.assertEqual(p1.scheme, 'tel') + self.assertEqual(p1.path, '123-4') + self.assertEqual(p1.params, 'phone-context=+1-650-516') + + p1 = urllib_parse.urlparse('tel:+1-201-555-0123') + self.assertEqual(p1.scheme, 'tel') + self.assertEqual(p1.path, '+1-201-555-0123') + self.assertEqual(p1.params, '') + + p1 = urllib_parse.urlparse('tel:7042;phone-context=example.com') + self.assertEqual(p1.scheme, 'tel') + self.assertEqual(p1.path, '7042') + self.assertEqual(p1.params, 'phone-context=example.com') + + p1 = urllib_parse.urlparse('tel:863-1234;phone-context=+1-914-555') + self.assertEqual(p1.scheme, 'tel') + self.assertEqual(p1.path, '863-1234') + self.assertEqual(p1.params, 'phone-context=+1-914-555') + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_future/test_utils.py b/tests/test_future/test_utils.py new file mode 100644 index 00000000..a496bcaf --- /dev/null +++ b/tests/test_future/test_utils.py @@ -0,0 +1,406 @@ +# -*- coding: utf-8 -*- +""" +Tests for the various utility functions and classes in ``future.utils`` +""" + +from __future__ import absolute_import, unicode_literals, print_function +import re, sys, traceback +from future.builtins import * +from future.utils import (old_div, istext, isbytes, native, PY2, PY3, + native_str, raise_, as_native_str, ensure_new_type, + bytes_to_native_str, raise_from) +from future.tests.base import expectedFailurePY3 + +from numbers import Integral +from future.tests.base import unittest, skip26 + + +TEST_UNICODE_STR = u'ℝεα∂@ßʟ℮ ☂ℯṧт υηḯ¢☺ḓ℮' + + +class MyExceptionIssue235(Exception): + def __init__(self, a, b): + super(MyExceptionIssue235, self).__init__('{0}: {1}'.format(a, b)) + + +class TestUtils(unittest.TestCase): + def setUp(self): + self.s = TEST_UNICODE_STR + self.s2 = str(self.s) + self.b = b'ABCDEFG' + self.b2 = bytes(self.b) + + def test_old_div(self): + """ + Tests whether old_div(a, b) is always equal to Python 2's a / b. + """ + self.assertEqual(old_div(1, 2), 0) + self.assertEqual(old_div(2, 2), 1) + self.assertTrue(isinstance(old_div(2, 2), int)) + + self.assertEqual(old_div(3, 2), 1) + self.assertTrue(isinstance(old_div(3, 2), int)) + + self.assertEqual(old_div(3., 2), 1.5) + self.assertTrue(not isinstance(old_div(3., 2), int)) + + self.assertEqual(old_div(-1, 2.), -0.5) + self.assertTrue(not isinstance(old_div(-1, 2.), int)) + + with self.assertRaises(ZeroDivisionError): + old_div(0, 0) + with self.assertRaises(ZeroDivisionError): + old_div(1, 0) + + def test_native_str(self): + """ + Tests whether native_str is really equal to the platform str. + """ + if PY2: + import __builtin__ + builtin_str = __builtin__.str + else: + import builtins + builtin_str = builtins.str + + inputs = [b'blah', u'blah', 'blah'] + for s in inputs: + self.assertEqual(native_str(s), builtin_str(s)) + self.assertTrue(isinstance(native_str(s), builtin_str)) + + def test_native(self): + a = int(10**20) # long int + b = native(a) + self.assertEqual(a, b) + if PY2: + self.assertEqual(type(b), long) + else: + self.assertEqual(type(b), int) + + c = bytes(b'ABC') + d = native(c) + self.assertEqual(c, d) + if PY2: + self.assertEqual(type(d), type(b'Py2 byte-string')) + else: + self.assertEqual(type(d), bytes) + + s = str(u'ABC') + t = native(s) + self.assertEqual(s, t) + if PY2: + self.assertEqual(type(t), unicode) + else: + self.assertEqual(type(t), str) + + d1 = dict({'a': 1, 'b': 2}) + d2 = native(d1) + self.assertEqual(d1, d2) + self.assertEqual(type(d2), type({})) + + def test_istext(self): + self.assertTrue(istext(self.s)) + self.assertTrue(istext(self.s2)) + self.assertFalse(istext(self.b)) + self.assertFalse(istext(self.b2)) + + def test_isbytes(self): + self.assertTrue(isbytes(self.b)) + self.assertTrue(isbytes(self.b2)) + self.assertFalse(isbytes(self.s)) + self.assertFalse(isbytes(self.s2)) + + def test_raise_(self): + def valuerror(): + try: + raise ValueError("Apples!") + except Exception as e: + raise_(e) + + self.assertRaises(ValueError, valuerror) + + def with_value(): + raise_(IOError, "This is an error") + + self.assertRaises(IOError, with_value) + + try: + with_value() + except IOError as e: + self.assertEqual(str(e), "This is an error") + + def with_traceback(): + try: + raise ValueError("An error") + except Exception as e: + _, _, traceback = sys.exc_info() + raise_(IOError, str(e), traceback) + + self.assertRaises(IOError, with_traceback) + + try: + with_traceback() + except IOError as e: + self.assertEqual(str(e), "An error") + + class Timeout(BaseException): + pass + + self.assertRaises(Timeout, raise_, Timeout) + self.assertRaises(Timeout, raise_, Timeout()) + + if PY3: + self.assertRaisesRegex( + TypeError, "class must derive from BaseException", + raise_, int) + + def test_raise_from_None(self): + try: + try: + raise TypeError("foo") + except: + raise_from(ValueError(), None) + except ValueError as e: + self.assertTrue(isinstance(e.__context__, TypeError)) + self.assertIsNone(e.__cause__) + + def test_issue_235(self): + def foo(): + raise MyExceptionIssue235(3, 7) + + def bar(): + try: + foo() + except Exception as err: + raise_from(ValueError('blue'), err) + + try: + bar() + except ValueError as e: + pass + # incorrectly raises a TypeError on Py3 as of v0.15.2. + + def test_raise_custom_exception(self): + """ + Test issue #387. + """ + class CustomException(Exception): + def __init__(self, severity, message): + super().__init__("custom message of severity %d: %s" % ( + severity, message)) + + def raise_custom_exception(): + try: + raise CustomException(1, "hello") + except CustomException: + raise_(*sys.exc_info()) + + self.assertRaises(CustomException, raise_custom_exception) + + @skip26 + def test_as_native_str(self): + """ + Tests the decorator as_native_str() + """ + class MyClass(object): + @as_native_str() + def __repr__(self): + return u'abc' + + obj = MyClass() + + self.assertEqual(repr(obj), 'abc') + if PY2: + self.assertEqual(repr(obj), b'abc') + else: + self.assertEqual(repr(obj), u'abc') + + def test_ensure_new_type(self): + s = u'abcd' + s2 = str(s) + self.assertEqual(ensure_new_type(s), s2) + self.assertEqual(type(ensure_new_type(s)), str) + + b = b'xyz' + b2 = bytes(b) + self.assertEqual(ensure_new_type(b), b2) + self.assertEqual(type(ensure_new_type(b)), bytes) + + i = 10000000000000 + i2 = int(i) + self.assertEqual(ensure_new_type(i), i2) + self.assertEqual(type(ensure_new_type(i)), int) + + l = [] + self.assertIs(ensure_new_type(l), l) + + def test_bytes_to_native_str(self): + """ + Test for issue #47 + """ + b = bytes(b'abc') + s = bytes_to_native_str(b) + if PY2: + self.assertEqual(s, b) + else: + self.assertEqual(s, 'abc') + self.assertTrue(isinstance(s, native_str)) + self.assertEqual(type(s), native_str) + + +class TestCause(unittest.TestCase): + """ + Except for the first method, these were adapted from Py3.3's + Lib/test/test_raise.py. + """ + def test_normal_use(self): + """ + Adapted from PEP 3134 docs + """ + # Setup: + class DatabaseError(Exception): + pass + + # Python 2 and 3: + from future.utils import raise_from + + class FileDatabase: + def __init__(self, filename): + try: + self.file = open(filename) + except IOError as exc: + raise_from(DatabaseError('failed to open'), exc) + + # Testing the above: + try: + fd = FileDatabase('non_existent_file.txt') + except Exception as e: + assert isinstance(e.__cause__, IOError) # FileNotFoundError on + # Py3.3+ inherits from IOError + + def testCauseSyntax(self): + try: + try: + try: + raise TypeError + except Exception: + raise_from(ValueError, None) + except ValueError as exc: + self.assertIsNone(exc.__cause__) + self.assertTrue(exc.__suppress_context__) + exc.__suppress_context__ = False + raise exc + except ValueError as exc: + e = exc + + self.assertIsNone(e.__cause__) + self.assertFalse(e.__suppress_context__) + self.assertIsInstance(e.__context__, TypeError) + + def test_invalid_cause(self): + try: + raise_from(IndexError, 5) + except TypeError as e: + self.assertIn("exception cause", str(e)) + else: + self.fail("No exception raised") + + def test_class_cause(self): + try: + raise_from(IndexError, KeyError) + except IndexError as e: + self.assertIsInstance(e.__cause__, KeyError) + else: + self.fail("No exception raised") + + def test_instance_cause(self): + cause = KeyError('blah') + try: + raise_from(IndexError, cause) + except IndexError as e: + # FAILS: + self.assertTrue(e.__cause__ is cause) + # Even this weaker version seems to fail, although repr(cause) looks correct. + # Is there something strange about testing exceptions for equality? + self.assertEqual(e.__cause__, cause) + else: + self.fail("No exception raised") + + def test_erroneous_cause(self): + class MyException(Exception): + def __init__(self): + raise RuntimeError() + + try: + raise_from(IndexError, MyException) + except RuntimeError: + pass + else: + self.fail("No exception raised") + + def test_single_exception_stacktrace(self): + expected = '''Traceback (most recent call last): + File "/opt/python-future/tests/test_future/test_utils.py", line 328, in test_single_exception_stacktrace + raise CustomException('ERROR') +''' + if PY2: + expected += 'CustomException: ERROR\n' + else: + expected += 'test_future.test_utils.CustomException: ERROR\n' + + try: + raise CustomException('ERROR') + except: + ret = re.sub(r'"[^"]*tests/test_future', '"/opt/python-future/tests/test_future', traceback.format_exc()) + ret = re.sub(r', line \d+,', ', line 328,', ret) + self.assertEqual(expected, ret) + else: + self.fail('No exception raised') + + if PY2: + def test_chained_exceptions_stacktrace(self): + expected = '''Traceback (most recent call last): + File "/opt/python-future/tests/test_future/test_utils.py", line 1, in test_chained_exceptions_stacktrace + raise_from(CustomException('ERROR'), val_err) + File "/opt/python-future/src/future/utils/__init__.py", line 1, in raise_from + raise e +CustomException: ERROR + +The above exception was the direct cause of the following exception: + + File "/opt/python-future/tests/test_future/test_utils.py", line 1, in test_chained_exceptions_stacktrace + raise ValueError('Wooops') +ValueError: Wooops +''' + + try: + try: + raise ValueError('Wooops') + except ValueError as val_err: + raise_from(CustomException('ERROR'), val_err) + except Exception as err: + ret = re.sub(r'"[^"]*tests/test_future', '"/opt/python-future/tests/test_future', traceback.format_exc()) + ret = re.sub(r'"[^"]*future/utils/__init__.py', '"/opt/python-future/src/future/utils/__init__.py', ret) + ret = re.sub(r', line \d+,', ', line 1,', ret) + self.assertEqual(expected.splitlines(), ret.splitlines()) + else: + self.fail('No exception raised') + + +class CustomException(Exception): + if PY2: + def __str__(self): + try: + out = Exception.__str__(self) + if hasattr(self, '__cause__') and self.__cause__ and hasattr(self.__cause__, '__traceback__') and self.__cause__.__traceback__: + out += '\n\nThe above exception was the direct cause of the following exception:\n\n' + out += ''.join(traceback.format_tb(self.__cause__.__traceback__) + ['{0}: {1}'.format(self.__cause__.__class__.__name__, self.__cause__)]) + return out + except Exception as e: + print(e) + else: + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_past/__init__.py b/tests/test_past/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_past/test_basestring.py b/tests/test_past/test_basestring.py new file mode 100644 index 00000000..6c224b3e --- /dev/null +++ b/tests/test_past/test_basestring.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +""" +Tests for the Py2-like class:`basestring` type. +""" + +from __future__ import absolute_import, unicode_literals, print_function +import os + +from past import utils +from future.tests.base import unittest +from past.builtins import basestring, str as oldstr + + +class TestBaseString(unittest.TestCase): + + def test_isinstance(self): + s = b'abc' + self.assertTrue(isinstance(s, basestring)) + s2 = oldstr(b'abc') + self.assertTrue(isinstance(s2, basestring)) + + def test_issubclass(self): + self.assertTrue(issubclass(str, basestring)) + self.assertTrue(issubclass(bytes, basestring)) + self.assertTrue(issubclass(basestring, basestring)) + self.assertFalse(issubclass(int, basestring)) + self.assertFalse(issubclass(list, basestring)) + self.assertTrue(issubclass(basestring, object)) + + class CustomString(basestring): + pass + class NotString(object): + pass + class OldStyleClass: + pass + self.assertTrue(issubclass(CustomString, basestring)) + self.assertFalse(issubclass(NotString, basestring)) + self.assertFalse(issubclass(OldStyleClass, basestring)) + + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_past/test_builtins.py b/tests/test_past/test_builtins.py new file mode 100644 index 00000000..98d3c8c1 --- /dev/null +++ b/tests/test_past/test_builtins.py @@ -0,0 +1,1789 @@ +from __future__ import division +from __future__ import print_function +# Python test set -- built-in functions +from past.builtins import filter, map, range, zip +from past.builtins import basestring, dict, str, long #, unicode +from past.builtins import apply, cmp, execfile, intern, raw_input +from past.builtins import reduce, reload, unichr, unicode, xrange + +from future.backports.test.support import TESTFN #, run_unittest +import tempfile +import os +TESTFN = tempfile.mkdtemp() + os.path.sep + TESTFN + +import platform +import warnings +import sys +import io +import random +# import UserDict +from os import unlink +from operator import neg +from future.tests.base import unittest, expectedFailurePY3, skip26 + +# count the number of test runs. +# used to skip running test_execfile() multiple times +# and to create unique strings to intern in test_intern() +numruns = 0 + +def fcmp(x, y): # fuzzy comparison function + """ + From Python 2.7 test.test_support + """ + if isinstance(x, float) or isinstance(y, float): + try: + fuzz = (abs(x) + abs(y)) * FUZZ + if abs(x-y) <= fuzz: + return 0 + except: + pass + elif type(x) == type(y) and isinstance(x, (tuple, list)): + for i in range(min(len(x), len(y))): + outcome = fcmp(x[i], y[i]) + if outcome != 0: + return outcome + return (len(x) > len(y)) - (len(x) < len(y)) + return (x > y) - (x < y) + + +class Squares: + + def __init__(self, max): + self.max = max + self.sofar = [] + + def __len__(self): return len(self.sofar) + + def __getitem__(self, i): + if not 0 <= i < self.max: raise IndexError + n = len(self.sofar) + while n <= i: + self.sofar.append(n*n) + n += 1 + return self.sofar[i] + +class StrSquares: + + def __init__(self, max): + self.max = max + self.sofar = [] + + def __len__(self): + return len(self.sofar) + + def __getitem__(self, i): + if not 0 <= i < self.max: + raise IndexError + n = len(self.sofar) + while n <= i: + self.sofar.append(str(n*n)) + n += 1 + return self.sofar[i] + +class BitBucket: + def write(self, line): + pass + + +class TestFailingBool: + def __nonzero__(self): + raise RuntimeError + +class TestFailingIter: + def __iter__(self): + raise RuntimeError + +class BuiltinTest(unittest.TestCase): + + def test_import(self): + __import__('sys') + __import__('time') + __import__('string') + __import__(name='sys') + __import__(name='time', level=0) + self.assertRaises(ImportError, __import__, 'spamspam') + self.assertRaises(TypeError, __import__, 1, 2, 3, 4) + self.assertRaises(ValueError, __import__, '') + self.assertRaises(TypeError, __import__, 'sys', name='sys') + + def test_abs(self): + # int + self.assertEqual(abs(0), 0) + self.assertEqual(abs(1234), 1234) + self.assertEqual(abs(-1234), 1234) + self.assertTrue(abs(-sys.maxsize-1) > 0) + # float + self.assertEqual(abs(0.0), 0.0) + self.assertEqual(abs(3.14), 3.14) + self.assertEqual(abs(-3.14), 3.14) + # long + self.assertEqual(abs(0), 0) + self.assertEqual(abs(1234), 1234) + self.assertEqual(abs(-1234), 1234) + # str + self.assertRaises(TypeError, abs, 'a') + # bool + self.assertEqual(abs(True), 1) + self.assertEqual(abs(False), 0) + # other + self.assertRaises(TypeError, abs) + self.assertRaises(TypeError, abs, None) + class AbsClass(object): + def __abs__(self): + return -5 + self.assertEqual(abs(AbsClass()), -5) + + def test_all(self): + self.assertEqual(all([2, 4, 6]), True) + self.assertEqual(all([2, None, 6]), False) + # self.assertRaises(RuntimeError, all, [2, TestFailingBool(), 6]) + # self.assertRaises(RuntimeError, all, TestFailingIter()) + self.assertRaises(TypeError, all, 10) # Non-iterable + self.assertRaises(TypeError, all) # No args + self.assertRaises(TypeError, all, [2, 4, 6], []) # Too many args + self.assertEqual(all([]), True) # Empty iterator + self.assertEqual(all([0, TestFailingBool()]), False)# Short-circuit + S = [50, 60] + self.assertEqual(all(x > 42 for x in S), True) + S = [50, 40, 60] + self.assertEqual(all(x > 42 for x in S), False) + + def test_any(self): + self.assertEqual(any([None, None, None]), False) + self.assertEqual(any([None, 4, None]), True) + # self.assertRaises(RuntimeError, any, [None, TestFailingBool(), 6]) + # self.assertRaises(RuntimeError, any, TestFailingIter()) + self.assertRaises(TypeError, any, 10) # Non-iterable + self.assertRaises(TypeError, any) # No args + self.assertRaises(TypeError, any, [2, 4, 6], []) # Too many args + self.assertEqual(any([]), False) # Empty iterator + self.assertEqual(any([1, TestFailingBool()]), True) # Short-circuit + S = [40, 60, 30] + self.assertEqual(any(x > 42 for x in S), True) + S = [10, 20, 30] + self.assertEqual(any(x > 42 for x in S), False) + + def test_neg(self): + x = -sys.maxsize-1 + self.assertTrue(isinstance(x, int)) + self.assertEqual(-x, sys.maxsize+1) + + @expectedFailurePY3 + def test_apply(self): + def f0(*args): + self.assertEqual(args, ()) + def f1(a1): + self.assertEqual(a1, 1) + def f2(a1, a2): + self.assertEqual(a1, 1) + self.assertEqual(a2, 2) + def f3(a1, a2, a3): + self.assertEqual(a1, 1) + self.assertEqual(a2, 2) + self.assertEqual(a3, 3) + f0(*()) + f1(*(1,)) + f2(*(1, 2)) + f3(*(1, 2, 3)) + + # A PyCFunction that takes only positional parameters should allow an + # empty keyword dictionary to pass without a complaint, but raise a + # TypeError if the dictionary is non-empty. + id(*(1,), **{}) + self.assertRaises(TypeError, apply, id, (1,), {"foo": 1}) + self.assertRaises(TypeError, apply) + self.assertRaises(TypeError, apply, id, 42) + self.assertRaises(TypeError, apply, id, (42,), 42) + + def test_basestring(self): + assert isinstance('hello', basestring) + assert isinstance(b'hello', basestring) + + @expectedFailurePY3 + def test_callable(self): + self.assertTrue(callable(len)) + self.assertFalse(callable("a")) + self.assertTrue(callable(callable)) + self.assertTrue(callable(lambda x, y: x + y)) + self.assertFalse(callable(__builtins__)) + def f(): pass + self.assertTrue(callable(f)) + + class Classic: + def meth(self): pass + self.assertTrue(callable(Classic)) + c = Classic() + self.assertTrue(callable(c.meth)) + self.assertFalse(callable(c)) + + class NewStyle(object): + def meth(self): pass + self.assertTrue(callable(NewStyle)) + n = NewStyle() + self.assertTrue(callable(n.meth)) + self.assertFalse(callable(n)) + + # Classic and new-style classes evaluate __call__() differently + c.__call__ = None + self.assertTrue(callable(c)) + del c.__call__ + self.assertFalse(callable(c)) + n.__call__ = None + self.assertFalse(callable(n)) + del n.__call__ + self.assertFalse(callable(n)) + + class N2(object): + def __call__(self): pass + n2 = N2() + self.assertTrue(callable(n2)) + class N3(N2): pass + n3 = N3() + self.assertTrue(callable(n3)) + + @expectedFailurePY3 + def test_chr(self): + self.assertEqual(chr(32), ' ') + self.assertEqual(chr(65), 'A') + self.assertEqual(chr(97), 'a') + self.assertEqual(chr(0xff), '\xff') + self.assertRaises(ValueError, chr, 256) + self.assertRaises(TypeError, chr) + + @expectedFailurePY3 + def test_cmp(self): + self.assertEqual(cmp(-1, 1), -1) + self.assertEqual(cmp(1, -1), 1) + self.assertEqual(cmp(1, 1), 0) + # verify that circular objects are not handled + a = []; a.append(a) + b = []; b.append(b) + from UserList import UserList + c = UserList(); c.append(c) + self.assertRaises(RuntimeError, cmp, a, b) + self.assertRaises(RuntimeError, cmp, b, c) + self.assertRaises(RuntimeError, cmp, c, a) + self.assertRaises(RuntimeError, cmp, a, c) + # okay, now break the cycles + a.pop(); b.pop(); c.pop() + self.assertRaises(TypeError, cmp) + + @expectedFailurePY3 + def test_coerce(self): + self.assertTrue(not fcmp(coerce(1, 1.1), (1.0, 1.1))) + self.assertEqual(coerce(1, 1), (1, 1)) + self.assertTrue(not fcmp(coerce(1, 1.1), (1.0, 1.1))) + self.assertRaises(TypeError, coerce) + class BadNumber: + def __coerce__(self, other): + raise ValueError + self.assertRaises(ValueError, coerce, 42, BadNumber()) + self.assertRaises(OverflowError, coerce, 0.5, int("12345" * 1000)) + + @expectedFailurePY3 + def test_compile(self): + compile('print(1)\n', '', 'exec') + bom = '\xef\xbb\xbf' + compile(bom + 'print(1)\n', '', 'exec') + compile(source='pass', filename='?', mode='exec') + compile(dont_inherit=0, filename='tmp', source='0', mode='eval') + compile('pass', '?', dont_inherit=1, mode='exec') + self.assertRaises(TypeError, compile) + self.assertRaises(ValueError, compile, 'print(42)\n', '', 'badmode') + self.assertRaises(ValueError, compile, 'print(42)\n', '', 'single', 0xff) + self.assertRaises(TypeError, compile, chr(0), 'f', 'exec') + self.assertRaises(TypeError, compile, 'pass', '?', 'exec', + mode='eval', source='0', filename='tmp') + if True: # Was: if have_unicode: + compile(unicode('print(u"\xc3\xa5"\n)', 'utf8'), '', 'exec') + self.assertRaises(TypeError, compile, unichr(0), 'f', 'exec') + self.assertRaises(ValueError, compile, unicode('a = 1'), 'f', 'bad') + + + def test_delattr(self): + import sys + sys.spam = 1 + delattr(sys, 'spam') + self.assertRaises(TypeError, delattr) + + def test_dir(self): + # dir(wrong number of arguments) + self.assertRaises(TypeError, dir, 42, 42) + + # dir() - local scope + local_var = 1 + self.assertIn('local_var', dir()) + + # dir(module) + import sys + self.assertIn('exit', dir(sys)) + + # dir(module_with_invalid__dict__) + import types + class Foo(types.ModuleType): + __dict__ = 8 + f = Foo("foo") + self.assertRaises(TypeError, dir, f) + + # dir(type) + self.assertIn("strip", dir(str)) + self.assertNotIn("__mro__", dir(str)) + + # dir(obj) + class Foo(object): + def __init__(self): + self.x = 7 + self.y = 8 + self.z = 9 + f = Foo() + self.assertIn("y", dir(f)) + + # dir(obj_no__dict__) + class Foo(object): + __slots__ = [] + f = Foo() + self.assertIn("__repr__", dir(f)) + + # dir(obj_no__class__with__dict__) + # (an ugly trick to cause getattr(f, "__class__") to fail) + class Foo(object): + __slots__ = ["__class__", "__dict__"] + def __init__(self): + self.bar = "wow" + f = Foo() + self.assertNotIn("__repr__", dir(f)) + self.assertIn("bar", dir(f)) + + # dir(obj_using __dir__) + class Foo(object): + def __dir__(self): + return ["kan", "ga", "roo"] + f = Foo() + self.assertTrue(dir(f) == ["ga", "kan", "roo"]) + + # dir(obj__dir__not_list) + class Foo(object): + def __dir__(self): + return 7 + f = Foo() + self.assertRaises(TypeError, dir, f) + + def test_divmod(self): + self.assertEqual(divmod(12, 7), (1, 5)) + self.assertEqual(divmod(-12, 7), (-2, 2)) + self.assertEqual(divmod(12, -7), (-2, -2)) + self.assertEqual(divmod(-12, -7), (1, -5)) + + self.assertEqual(divmod(12, 7), (1, 5)) + self.assertEqual(divmod(-12, 7), (-2, 2)) + self.assertEqual(divmod(12, -7), (-2, -2)) + self.assertEqual(divmod(-12, -7), (1, -5)) + + self.assertEqual(divmod(12, 7), (1, 5)) + self.assertEqual(divmod(-12, 7), (-2, 2)) + self.assertEqual(divmod(12, -7), (-2, -2)) + self.assertEqual(divmod(-12, -7), (1, -5)) + + self.assertEqual(divmod(-sys.maxsize-1, -1), + (sys.maxsize+1, 0)) + + self.assertTrue(not fcmp(divmod(3.25, 1.0), (3.0, 0.25))) + self.assertTrue(not fcmp(divmod(-3.25, 1.0), (-4.0, 0.75))) + self.assertTrue(not fcmp(divmod(3.25, -1.0), (-4.0, -0.75))) + self.assertTrue(not fcmp(divmod(-3.25, -1.0), (3.0, -0.25))) + + self.assertRaises(TypeError, divmod) + + @expectedFailurePY3 + def test_eval(self): + self.assertEqual(eval('1+1'), 2) + self.assertEqual(eval(' 1+1\n'), 2) + globals = {'a': 1, 'b': 2} + locals = {'b': 200, 'c': 300} + self.assertEqual(eval('a', globals) , 1) + self.assertEqual(eval('a', globals, locals), 1) + self.assertEqual(eval('b', globals, locals), 200) + self.assertEqual(eval('c', globals, locals), 300) + if True: # Was: if have_unicode: + self.assertEqual(eval(unicode('1+1')), 2) + self.assertEqual(eval(unicode(' 1+1\n')), 2) + globals = {'a': 1, 'b': 2} + locals = {'b': 200, 'c': 300} + if True: # Was: if have_unicode: + self.assertEqual(eval(unicode('a'), globals), 1) + self.assertEqual(eval(unicode('a'), globals, locals), 1) + self.assertEqual(eval(unicode('b'), globals, locals), 200) + self.assertEqual(eval(unicode('c'), globals, locals), 300) + bom = '\xef\xbb\xbf' + self.assertEqual(eval(bom + 'a', globals, locals), 1) + self.assertEqual(eval(unicode('u"\xc3\xa5"', 'utf8'), globals), + unicode('\xc3\xa5', 'utf8')) + self.assertRaises(TypeError, eval) + self.assertRaises(TypeError, eval, ()) + + @expectedFailurePY3 + def test_general_eval(self): + # Tests that general mappings can be used for the locals argument + + class M: + "Test mapping interface versus possible calls from eval()." + def __getitem__(self, key): + if key == 'a': + return 12 + raise KeyError + def keys(self): + return list('xyz') + + m = M() + g = globals() + self.assertEqual(eval('a', g, m), 12) + self.assertRaises(NameError, eval, 'b', g, m) + self.assertEqual(eval('dir()', g, m), list('xyz')) + self.assertEqual(eval('globals()', g, m), g) + self.assertEqual(eval('locals()', g, m), m) + self.assertRaises(TypeError, eval, 'a', m) + class A: + "Non-mapping" + pass + m = A() + self.assertRaises(TypeError, eval, 'a', g, m) + + # Verify that dict subclasses work as well + class D(dict): + def __getitem__(self, key): + if key == 'a': + return 12 + return dict.__getitem__(self, key) + def keys(self): + return list('xyz') + + d = D() + self.assertEqual(eval('a', g, d), 12) + self.assertRaises(NameError, eval, 'b', g, d) + self.assertEqual(eval('dir()', g, d), list('xyz')) + self.assertEqual(eval('globals()', g, d), g) + self.assertEqual(eval('locals()', g, d), d) + + # Verify locals stores (used by list comps) + eval('[locals() for i in (2,3)]', g, d) + # eval('[locals() for i in (2,3)]', g, UserDict.UserDict()) + + class SpreadSheet: + "Sample application showing nested, calculated lookups." + _cells = {} + def __setitem__(self, key, formula): + self._cells[key] = formula + def __getitem__(self, key): + return eval(self._cells[key], globals(), self) + + ss = SpreadSheet() + ss['a1'] = '5' + ss['a2'] = 'a1*6' + ss['a3'] = 'a2*7' + self.assertEqual(ss['a3'], 210) + + # Verify that dir() catches a non-list returned by eval + # SF bug #1004669 + class C: + def __getitem__(self, item): + raise KeyError(item) + def keys(self): + return 'a' + self.assertRaises(TypeError, eval, 'dir()', globals(), C()) + + # Done outside of the method test_z to get the correct scope + z = 0 + f = open(TESTFN, 'w') + f.write('z = z+1\n') + f.write('z = z*2\n') + f.close() + if True: + # with check_py3k_warnings(("execfile.. not supported in 3.x", + # DeprecationWarning)): + execfile(TESTFN) + + def test_execfile(self): + global numruns + if numruns: + return + numruns += 1 + + globals = {'a': 1, 'b': 2} + locals = {'b': 200, 'c': 300} + + self.assertEqual(self.__class__.z, 2) + globals['z'] = 0 + execfile(TESTFN, globals) + self.assertEqual(globals['z'], 2) + locals['z'] = 0 + execfile(TESTFN, globals, locals) + self.assertEqual(locals['z'], 2) + + # This test only works if we pass in a Mapping type. + class M(dict): + "Test mapping interface versus possible calls from execfile()." + def __init__(self): + self.z = 10 + def __getitem__(self, key): + if key == 'z': + return self.z + raise KeyError + def __setitem__(self, key, value): + if key == 'z': + self.z = value + return + raise KeyError + + locals = M() + locals['z'] = 0 + execfile(TESTFN, globals, locals) + self.assertEqual(locals['z'], 2) + + unlink(TESTFN) + self.assertRaises(TypeError, execfile) + self.assertRaises(TypeError, execfile, TESTFN, {}, ()) + import os + self.assertRaises(IOError, execfile, os.curdir) + self.assertRaises(IOError, execfile, "I_dont_exist") + + @expectedFailurePY3 + def test_filter(self): + self.assertEqual(filter(lambda c: 'a' <= c <= 'z', 'Hello World'), 'elloorld') + self.assertEqual(filter(None, [1, 'hello', [], [3], '', None, 9, 0]), [1, 'hello', [3], 9]) + self.assertEqual(filter(lambda x: x > 0, [1, -3, 9, 0, 2]), [1, 9, 2]) + self.assertEqual(filter(None, Squares(10)), [1, 4, 9, 16, 25, 36, 49, 64, 81]) + self.assertEqual(filter(lambda x: x%2, Squares(10)), [1, 9, 25, 49, 81]) + def identity(item): + return 1 + filter(identity, Squares(5)) + self.assertRaises(TypeError, filter) + class BadSeq(object): + def __getitem__(self, index): + if index<4: + return 42 + raise ValueError + self.assertRaises(ValueError, filter, lambda x: x, BadSeq()) + def badfunc(): + pass + self.assertRaises(TypeError, filter, badfunc, range(5)) + + # test bltinmodule.c::filtertuple() + self.assertEqual(filter(None, (1, 2)), (1, 2)) + self.assertEqual(filter(lambda x: x>=3, (1, 2, 3, 4)), (3, 4)) + self.assertRaises(TypeError, filter, 42, (1, 2)) + + # test bltinmodule.c::filterstring() + self.assertEqual(filter(None, "12"), "12") + self.assertEqual(filter(lambda x: x>="3", "1234"), "34") + self.assertRaises(TypeError, filter, 42, "12") + class badstr(str): + def __getitem__(self, index): + raise ValueError + self.assertRaises(ValueError, filter, lambda x: x >="3", badstr("1234")) + + class badstr2(str): + def __getitem__(self, index): + return 42 + self.assertRaises(TypeError, filter, lambda x: x >=42, badstr2("1234")) + + class weirdstr(str): + def __getitem__(self, index): + return weirdstr(2*str.__getitem__(self, index)) + self.assertEqual(filter(lambda x: x>="33", weirdstr("1234")), "3344") + + class shiftstr(str): + def __getitem__(self, index): + return chr(ord(str.__getitem__(self, index))+1) + self.assertEqual(filter(lambda x: x>="3", shiftstr("1234")), "345") + + if True: # Was: if have_unicode: + # test bltinmodule.c::filterunicode() + self.assertEqual(filter(None, unicode("12")), unicode("12")) + self.assertEqual(filter(lambda x: x>="3", unicode("1234")), unicode("34")) + self.assertRaises(TypeError, filter, 42, unicode("12")) + self.assertRaises(ValueError, filter, lambda x: x >="3", badstr(unicode("1234"))) + + class badunicode(unicode): + def __getitem__(self, index): + return 42 + self.assertRaises(TypeError, filter, lambda x: x >=42, badunicode("1234")) + + class weirdunicode(unicode): + def __getitem__(self, index): + return weirdunicode(2*unicode.__getitem__(self, index)) + self.assertEqual( + filter(lambda x: x>=unicode("33"), weirdunicode("1234")), unicode("3344")) + + class shiftunicode(unicode): + def __getitem__(self, index): + return unichr(ord(unicode.__getitem__(self, index))+1) + self.assertEqual( + filter(lambda x: x>=unicode("3"), shiftunicode("1234")), + unicode("345") + ) + + @expectedFailurePY3 + def test_filter_subclasses(self): + # test that filter() never returns tuple, str or unicode subclasses + # and that the result always goes through __getitem__ + funcs = (None, bool, lambda x: True) + class tuple2(tuple): + def __getitem__(self, index): + return 2*tuple.__getitem__(self, index) + class str2(str): + def __getitem__(self, index): + return 2*str.__getitem__(self, index) + inputs = { + tuple2: {(): (), (1, 2, 3): (2, 4, 6)}, + str2: {"": "", "123": "112233"} + } + if True: # Was: if have_unicode: + class unicode2(unicode): + def __getitem__(self, index): + return 2*unicode.__getitem__(self, index) + inputs[unicode2] = { + unicode(): unicode(), + unicode("123"): unicode("112233") + } + + for (cls, inps) in inputs.items(): + for (inp, exp) in inps.items(): + # make sure the output goes through __getitem__ + # even if func is None + self.assertEqual( + filter(funcs[0], cls(inp)), + filter(funcs[1], cls(inp)) + ) + for func in funcs: + outp = filter(func, cls(inp)) + self.assertEqual(outp, exp) + self.assertTrue(not isinstance(outp, cls)) + + @expectedFailurePY3 + def test_getattr(self): + import sys + self.assertTrue(getattr(sys, 'stdout') is sys.stdout) + self.assertRaises(TypeError, getattr, sys, 1) + self.assertRaises(TypeError, getattr, sys, 1, "foo") + self.assertRaises(TypeError, getattr) + if True: # Was: have_unicode: + self.assertRaises(UnicodeError, getattr, sys, unichr(sys.maxunicode)) + + @expectedFailurePY3 + def test_hasattr(self): + import sys + self.assertTrue(hasattr(sys, 'stdout')) + self.assertRaises(TypeError, hasattr, sys, 1) + self.assertRaises(TypeError, hasattr) + if True: # Was: if have_unicode: + self.assertRaises(UnicodeError, hasattr, sys, unichr(sys.maxunicode)) + + # Check that hasattr allows SystemExit and KeyboardInterrupts by + class A: + def __getattr__(self, what): + raise KeyboardInterrupt + self.assertRaises(KeyboardInterrupt, hasattr, A(), "b") + class B: + def __getattr__(self, what): + raise SystemExit + self.assertRaises(SystemExit, hasattr, B(), "b") + + def test_hash(self): + hash(None) + self.assertEqual(hash(1), hash(1)) + self.assertEqual(hash(1), hash(1.0)) + hash('spam') + if True: # Was: if have_unicode: + self.assertEqual(hash('spam'), hash(unicode('spam'))) + hash((0,1,2,3)) + def f(): pass + self.assertRaises(TypeError, hash, []) + self.assertRaises(TypeError, hash, {}) + # Bug 1536021: Allow hash to return long objects + class X: + def __hash__(self): + return 2**100 + self.assertEqual(type(hash(X())), int) + class Y(object): + def __hash__(self): + return 2**100 + self.assertEqual(type(hash(Y())), int) + class Z(long): + def __hash__(self): + return self + self.assertEqual(hash(Z(42)), hash(42)) + + def test_hex(self): + self.assertEqual(hex(16), '0x10') + # self.assertEqual(hex(16L), '0x10L') + self.assertEqual(hex(-16), '-0x10') + # self.assertEqual(hex(-16L), '-0x10L') + self.assertRaises(TypeError, hex, {}) + + def test_id(self): + id(None) + id(1) + id(1) + id(1.0) + id('spam') + id((0,1,2,3)) + id([0,1,2,3]) + id({'spam': 1, 'eggs': 2, 'ham': 3}) + + # Test input() later, together with raw_input + + # test_int(): see test_int.py for int() tests. + + @expectedFailurePY3 + def test_intern(self): + self.assertRaises(TypeError, intern) + # This fails if the test is run twice with a constant string, + # therefore append the run counter + s = "never interned before " + str(numruns) + self.assertTrue(intern(s) is s) + s2 = s.swapcase().swapcase() + self.assertTrue(intern(s2) is s) + + # Subclasses of string can't be interned, because they + # provide too much opportunity for insane things to happen. + # We don't want them in the interned dict and if they aren't + # actually interned, we don't want to create the appearance + # that they are by allowing intern() to succeed. + class S(str): + def __hash__(self): + return 123 + + self.assertRaises(TypeError, intern, S("abc")) + + # It's still safe to pass these strings to routines that + # call intern internally, e.g. PyObject_SetAttr(). + s = S("abc") + setattr(s, s, s) + self.assertEqual(getattr(s, s), s) + + @expectedFailurePY3 + def test_iter(self): + self.assertRaises(TypeError, iter) + self.assertRaises(TypeError, iter, 42, 42) + lists = [("1", "2"), ["1", "2"], "12"] + if True: # Was: if have_unicode: + lists.append(unicode("12")) + for l in lists: + i = iter(l) + self.assertEqual(i.next(), '1') + self.assertEqual(i.next(), '2') + self.assertRaises(StopIteration, i.next) + + def test_isinstance(self): + class C: + pass + class D(C): + pass + class E: + pass + c = C() + d = D() + e = E() + self.assertTrue(isinstance(c, C)) + self.assertTrue(isinstance(d, C)) + self.assertTrue(not isinstance(e, C)) + self.assertTrue(not isinstance(c, D)) + self.assertTrue(not isinstance('foo', E)) + self.assertRaises(TypeError, isinstance, E, 'foo') + self.assertRaises(TypeError, isinstance) + + def test_issubclass(self): + class C: + pass + class D(C): + pass + class E: + pass + c = C() + d = D() + e = E() + self.assertTrue(issubclass(D, C)) + self.assertTrue(issubclass(C, C)) + self.assertTrue(not issubclass(C, D)) + self.assertRaises(TypeError, issubclass, 'foo', E) + self.assertRaises(TypeError, issubclass, E, 'foo') + self.assertRaises(TypeError, issubclass) + + @expectedFailurePY3 + def test_len(self): + self.assertEqual(len('123'), 3) + self.assertEqual(len(()), 0) + self.assertEqual(len((1, 2, 3, 4)), 4) + self.assertEqual(len([1, 2, 3, 4]), 4) + self.assertEqual(len({}), 0) + self.assertEqual(len({'a':1, 'b': 2}), 2) + class BadSeq: + def __len__(self): + raise ValueError + self.assertRaises(ValueError, len, BadSeq()) + self.assertRaises(TypeError, len, 2) + class ClassicStyle: pass + class NewStyle(object): pass + self.assertRaises(AttributeError, len, ClassicStyle()) + self.assertRaises(TypeError, len, NewStyle()) + + def test_map(self): + self.assertEqual( + map(None, 'hello world'), + ['h','e','l','l','o',' ','w','o','r','l','d'] + ) + self.assertEqual( + map(None, 'abcd', 'efg'), + [('a', 'e'), ('b', 'f'), ('c', 'g'), ('d', None)] + ) + self.assertEqual( + map(None, range(10)), + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + ) + self.assertEqual( + map(lambda x: x*x, range(1,4)), + [1, 4, 9] + ) + try: + from math import sqrt + except ImportError: + def sqrt(x): + return pow(x, 0.5) + self.assertEqual( + map(lambda x: map(sqrt,x), [[16, 4], [81, 9]]), + [[4.0, 2.0], [9.0, 3.0]] + ) + self.assertEqual( + map(lambda x, y: x+y, [1,3,2], [9,1,4]), + [10, 4, 6] + ) + + def plus(*v): + accu = 0 + for i in v: accu = accu + i + return accu + self.assertEqual( + map(plus, [1, 3, 7]), + [1, 3, 7] + ) + self.assertEqual( + map(plus, [1, 3, 7], [4, 9, 2]), + [1+4, 3+9, 7+2] + ) + self.assertEqual( + map(plus, [1, 3, 7], [4, 9, 2], [1, 1, 0]), + [1+4+1, 3+9+1, 7+2+0] + ) + self.assertEqual( + map(None, Squares(10)), + [0, 1, 4, 9, 16, 25, 36, 49, 64, 81] + ) + self.assertEqual( + map(int, Squares(10)), + [0, 1, 4, 9, 16, 25, 36, 49, 64, 81] + ) + self.assertEqual( + map(None, Squares(3), Squares(2)), + [(0,0), (1,1), (4,None)] + ) + # This fails on Py3: + # self.assertEqual( + # map(max, Squares(3), Squares(2)), + # [0, 1, 4] + # ) + self.assertRaises(TypeError, map) + self.assertRaises(TypeError, map, lambda x: x, 42) + self.assertEqual(map(None, [42]), [42]) + class BadSeq: + def __getitem__(self, index): + raise ValueError + self.assertRaises(ValueError, map, lambda x: x, BadSeq()) + def badfunc(x): + raise RuntimeError + self.assertRaises(RuntimeError, map, badfunc, range(5)) + + def test_max(self): + self.assertEqual(max('123123'), '3') + self.assertEqual(max(1, 2, 3), 3) + self.assertEqual(max((1, 2, 3, 1, 2, 3)), 3) + self.assertEqual(max([1, 2, 3, 1, 2, 3]), 3) + + self.assertEqual(max(1, 2, 3.0), 3.0) + self.assertEqual(max(1, 2.0, 3), 3) + self.assertEqual(max(1.0, 2, 3), 3) + + for stmt in ( + "max(key=int)", # no args + "max(1, key=int)", # single arg not iterable + "max(1, 2, keystone=int)", # wrong keyword + "max(1, 2, key=int, abc=int)", # two many keywords + "max(1, 2, key=1)", # keyfunc is not callable + ): + try: + exec(stmt) in globals() + except TypeError: + pass + else: + self.fail(stmt) + + self.assertEqual(max((1,), key=neg), 1) # one elem iterable + self.assertEqual(max((1,2), key=neg), 1) # two elem iterable + self.assertEqual(max(1, 2, key=neg), 1) # two elems + + data = [random.randrange(200) for i in range(100)] + keys = dict((elem, random.randrange(50)) for elem in data) + f = keys.__getitem__ + self.assertEqual(max(data, key=f), + sorted(reversed(data), key=f)[-1]) + + @expectedFailurePY3 + def test_min(self): + self.assertEqual(min('123123'), '1') + self.assertEqual(min(1, 2, 3), 1) + self.assertEqual(min((1, 2, 3, 1, 2, 3)), 1) + self.assertEqual(min([1, 2, 3, 1, 2, 3]), 1) + + self.assertEqual(min(1, 2, 3.0), 1) + self.assertEqual(min(1, 2.0, 3), 1) + self.assertEqual(min(1.0, 2, 3), 1.0) + + self.assertRaises(TypeError, min) + self.assertRaises(TypeError, min, 42) + self.assertRaises(ValueError, min, ()) + class BadSeq: + def __getitem__(self, index): + raise ValueError + self.assertRaises(ValueError, min, BadSeq()) + class BadNumber: + def __cmp__(self, other): + raise ValueError + self.assertRaises(ValueError, min, (42, BadNumber())) + + for stmt in ( + "min(key=int)", # no args + "min(1, key=int)", # single arg not iterable + "min(1, 2, keystone=int)", # wrong keyword + "min(1, 2, key=int, abc=int)", # two many keywords + "min(1, 2, key=1)", # keyfunc is not callable + ): + try: + exec(stmt) in globals() + except TypeError: + pass + else: + self.fail(stmt) + + self.assertEqual(min((1,), key=neg), 1) # one elem iterable + self.assertEqual(min((1,2), key=neg), 2) # two elem iterable + self.assertEqual(min(1, 2, key=neg), 2) # two elems + + data = [random.randrange(200) for i in range(100)] + keys = dict((elem, random.randrange(50)) for elem in data) + f = keys.__getitem__ + self.assertEqual(min(data, key=f), + sorted(data, key=f)[0]) + + @expectedFailurePY3 + def test_next(self): + it = iter(range(2)) + self.assertEqual(next(it), 0) + self.assertEqual(next(it), 1) + self.assertRaises(StopIteration, next, it) + self.assertRaises(StopIteration, next, it) + self.assertEqual(next(it, 42), 42) + + class Iter(object): + def __iter__(self): + return self + def next(self): + raise StopIteration + + it = iter(Iter()) + self.assertEqual(next(it, 42), 42) + self.assertRaises(StopIteration, next, it) + + def gen(): + yield 1 + return + + it = gen() + self.assertEqual(next(it), 1) + self.assertRaises(StopIteration, next, it) + self.assertEqual(next(it, 42), 42) + + @expectedFailurePY3 + def test_oct(self): + self.assertEqual(oct(100), '0144') + # self.assertEqual(oct(100L), '0144L') + self.assertEqual(oct(-100), '-0144') + # self.assertEqual(oct(-100L), '-0144L') + self.assertRaises(TypeError, oct, ()) + + def write_testfile(self): + # NB the first 4 lines are also used to test input and raw_input, below + fp = open(TESTFN, 'w') + try: + fp.write('1+1\n') + fp.write('1+1\n') + fp.write('The quick brown fox jumps over the lazy dog') + fp.write('.\n') + fp.write('Dear John\n') + fp.write('XXX'*100) + fp.write('YYY'*100) + finally: + fp.close() + + def test_open(self): + self.write_testfile() + fp = open(TESTFN, 'r') + try: + self.assertEqual(fp.readline(4), '1+1\n') + self.assertEqual(fp.readline(4), '1+1\n') + self.assertEqual(fp.readline(), 'The quick brown fox jumps over the lazy dog.\n') + self.assertEqual(fp.readline(4), 'Dear') + self.assertEqual(fp.readline(100), ' John\n') + self.assertEqual(fp.read(300), 'XXX'*100) + self.assertEqual(fp.read(1000), 'YYY'*100) + finally: + fp.close() + unlink(TESTFN) + + @expectedFailurePY3 + def test_ord(self): + self.assertEqual(ord(' '), 32) + self.assertEqual(ord('A'), 65) + self.assertEqual(ord('a'), 97) + if True: # Was: if have_unicode: + self.assertEqual(ord(unichr(sys.maxunicode)), sys.maxunicode) + self.assertRaises(TypeError, ord, 42) + if True: # Was: if have_unicode: + self.assertRaises(TypeError, ord, unicode("12")) + + @expectedFailurePY3 + def test_pow(self): + self.assertEqual(pow(0,0), 1) + self.assertEqual(pow(0,1), 0) + self.assertEqual(pow(1,0), 1) + self.assertEqual(pow(1,1), 1) + + self.assertEqual(pow(2,0), 1) + self.assertEqual(pow(2,10), 1024) + self.assertEqual(pow(2,20), 1024*1024) + self.assertEqual(pow(2,30), 1024*1024*1024) + + self.assertEqual(pow(-2,0), 1) + self.assertEqual(pow(-2,1), -2) + self.assertEqual(pow(-2,2), 4) + self.assertEqual(pow(-2,3), -8) + + self.assertEqual(pow(0,0), 1) + self.assertEqual(pow(0,1), 0) + self.assertEqual(pow(1,0), 1) + self.assertEqual(pow(1,1), 1) + + self.assertEqual(pow(2,0), 1) + self.assertEqual(pow(2,10), 1024) + self.assertEqual(pow(2,20), 1024*1024) + self.assertEqual(pow(2,30), 1024*1024*1024) + + self.assertEqual(pow(-2,0), 1) + self.assertEqual(pow(-2,1), -2) + self.assertEqual(pow(-2,2), 4) + self.assertEqual(pow(-2,3), -8) + + self.assertAlmostEqual(pow(0.,0), 1.) + self.assertAlmostEqual(pow(0.,1), 0.) + self.assertAlmostEqual(pow(1.,0), 1.) + self.assertAlmostEqual(pow(1.,1), 1.) + + self.assertAlmostEqual(pow(2.,0), 1.) + self.assertAlmostEqual(pow(2.,10), 1024.) + self.assertAlmostEqual(pow(2.,20), 1024.*1024.) + self.assertAlmostEqual(pow(2.,30), 1024.*1024.*1024.) + + self.assertAlmostEqual(pow(-2.,0), 1.) + self.assertAlmostEqual(pow(-2.,1), -2.) + self.assertAlmostEqual(pow(-2.,2), 4.) + self.assertAlmostEqual(pow(-2.,3), -8.) + + for x in 2, 2, 2.0: + for y in 10, 10, 10.0: + for z in 1000, 1000, 1000.0: + if isinstance(x, float) or \ + isinstance(y, float) or \ + isinstance(z, float): + self.assertRaises(TypeError, pow, x, y, z) + else: + self.assertAlmostEqual(pow(x, y, z), 24.0) + + self.assertRaises(TypeError, pow, -1, -2, 3) + self.assertRaises(ValueError, pow, 1, 2, 0) + self.assertRaises(TypeError, pow, -1, -2, 3) + self.assertRaises(ValueError, pow, 1, 2, 0) + # Will return complex in 3.0: + self.assertRaises(ValueError, pow, -342.43, 0.234) + + self.assertRaises(TypeError, pow) + + @skip26 + @expectedFailurePY3 + def test_range(self): + self.assertEqual(range(3), [0, 1, 2]) + self.assertEqual(range(1, 5), [1, 2, 3, 4]) + self.assertEqual(range(0), []) + self.assertEqual(range(-3), []) + self.assertEqual(range(1, 10, 3), [1, 4, 7]) + self.assertEqual(range(5, -5, -3), [5, 2, -1, -4]) + + # Now test range() with longs + self.assertEqual(range(-2**100), []) + self.assertEqual(range(0, -2**100), []) + self.assertEqual(range(0, 2**100, -1), []) + self.assertEqual(range(0, 2**100, -1), []) + + a = long(10 * sys.maxsize) + b = long(100 * sys.maxsize) + c = long(50 * sys.maxsize) + + self.assertEqual(range(a, a+2), [a, a+1]) + self.assertEqual(range(a+2, a, -1), [a+2, a+1]) + self.assertEqual(range(a+4, a, -2), [a+4, a+2]) + + seq = range(a, b, c) + self.assertIn(a, seq) + self.assertNotIn(b, seq) + self.assertEqual(len(seq), 2) + + seq = range(b, a, -c) + self.assertIn(b, seq) + self.assertNotIn(a, seq) + self.assertEqual(len(seq), 2) + + seq = range(-a, -b, -c) + self.assertIn(-a, seq) + self.assertNotIn(-b, seq) + self.assertEqual(len(seq), 2) + + self.assertRaises(TypeError, range) + self.assertRaises(TypeError, range, 1, 2, 3, 4) + self.assertRaises(ValueError, range, 1, 2, 0) + self.assertRaises(ValueError, range, a, a + 1, long(0)) + + class badzero(int): + def __cmp__(self, other): + raise RuntimeError + __hash__ = None # Invalid cmp makes this unhashable + self.assertRaises(RuntimeError, range, a, a + 1, badzero(1)) + + # Reject floats. + self.assertRaises(TypeError, range, 1., 1., 1.) + self.assertRaises(TypeError, range, 1e100, 1e101, 1e101) + + self.assertRaises(TypeError, range, 0, "spam") + self.assertRaises(TypeError, range, 0, 42, "spam") + + self.assertRaises(OverflowError, range, -sys.maxsize, sys.maxsize) + self.assertRaises(OverflowError, range, 0, 2*sys.maxsize) + + bignum = 2*sys.maxsize + smallnum = 42 + # Old-style user-defined class with __int__ method + class I0: + def __init__(self, n): + self.n = int(n) + def __int__(self): + return self.n + self.assertEqual(range(I0(bignum), I0(bignum + 1)), [bignum]) + self.assertEqual(range(I0(smallnum), I0(smallnum + 1)), [smallnum]) + + # New-style user-defined class with __int__ method + class I1(object): + def __init__(self, n): + self.n = int(n) + def __int__(self): + return self.n + self.assertEqual(range(I1(bignum), I1(bignum + 1)), [bignum]) + self.assertEqual(range(I1(smallnum), I1(smallnum + 1)), [smallnum]) + + # New-style user-defined class with failing __int__ method + class IX(object): + def __int__(self): + raise RuntimeError + self.assertRaises(RuntimeError, range, IX()) + + # New-style user-defined class with invalid __int__ method + class IN(object): + def __int__(self): + return "not a number" + self.assertRaises(TypeError, range, IN()) + + # Exercise various combinations of bad arguments, to check + # refcounting logic + self.assertRaises(TypeError, range, 0.0) + + self.assertRaises(TypeError, range, 0, 0.0) + self.assertRaises(TypeError, range, 0.0, 0) + self.assertRaises(TypeError, range, 0.0, 0.0) + + self.assertRaises(TypeError, range, 0, 0, 1.0) + self.assertRaises(TypeError, range, 0, 0.0, 1) + self.assertRaises(TypeError, range, 0, 0.0, 1.0) + self.assertRaises(TypeError, range, 0.0, 0, 1) + self.assertRaises(TypeError, range, 0.0, 0, 1.0) + self.assertRaises(TypeError, range, 0.0, 0.0, 1) + self.assertRaises(TypeError, range, 0.0, 0.0, 1.0) + + @expectedFailurePY3 + def test_input_and_raw_input(self): + self.write_testfile() + fp = open(TESTFN, 'r') + savestdin = sys.stdin + savestdout = sys.stdout # Eats the echo + try: + sys.stdin = fp + sys.stdout = BitBucket() + self.assertEqual(input(), 2) + self.assertEqual(input('testing\n'), 2) + self.assertEqual(raw_input(), 'The quick brown fox jumps over the lazy dog.') + self.assertEqual(raw_input('testing\n'), 'Dear John') + + # SF 1535165: don't segfault on closed stdin + # sys.stdout must be a regular file for triggering + sys.stdout = savestdout + sys.stdin.close() + self.assertRaises(ValueError, input) + + sys.stdout = BitBucket() + sys.stdin = io.BytesIO(b"NULL\0") + self.assertRaises(TypeError, input, 42, 42) + sys.stdin = io.BytesIO(b" 'whitespace'") + self.assertEqual(input(), 'whitespace') + sys.stdin = io.BytesIO() + self.assertRaises(EOFError, input) + + # SF 876178: make sure input() respect future options. + sys.stdin = io.BytesIO(b'1/2') + sys.stdout = io.BytesIO() + exec(compile('print(input())', 'test_builtin_tmp', 'exec')) + sys.stdin.seek(0, 0) + exec(compile('from __future__ import division;print(input())', + 'test_builtin_tmp', 'exec')) + sys.stdin.seek(0, 0) + exec(compile('print(input())', 'test_builtin_tmp', 'exec')) + # The result we expect depends on whether new division semantics + # are already in effect. + if 1/2 == 0: + # This test was compiled with old semantics. + expected = ['0', '0.5', '0'] + else: + # This test was compiled with new semantics (e.g., -Qnew + # was given on the command line. + expected = ['0.5', '0.5', '0.5'] + self.assertEqual(sys.stdout.getvalue().splitlines(), expected) + + del sys.stdout + self.assertRaises(RuntimeError, input, 'prompt') + del sys.stdin + self.assertRaises(RuntimeError, input, 'prompt') + finally: + sys.stdin = savestdin + sys.stdout = savestdout + fp.close() + unlink(TESTFN) + + def test_reduce(self): + add = lambda x, y: x+y + self.assertEqual(reduce(add, ['a', 'b', 'c'], ''), 'abc') + self.assertEqual( + reduce(add, [['a', 'c'], [], ['d', 'w']], []), + ['a','c','d','w'] + ) + self.assertEqual(reduce(lambda x, y: x*y, range(2,8), 1), 5040) + self.assertEqual( + reduce(lambda x, y: x*y, range(2,21), 1), + 2432902008176640000 + ) + self.assertEqual(reduce(add, Squares(10)), 285) + self.assertEqual(reduce(add, Squares(10), 0), 285) + self.assertEqual(reduce(add, Squares(0), 0), 0) + self.assertRaises(TypeError, reduce) + self.assertRaises(TypeError, reduce, 42) + self.assertRaises(TypeError, reduce, 42, 42) + self.assertRaises(TypeError, reduce, 42, 42, 42) + self.assertRaises(TypeError, reduce, None, range(5)) + self.assertRaises(TypeError, reduce, add, 42) + self.assertEqual(reduce(42, "1"), "1") # func is never called with one item + self.assertEqual(reduce(42, "", "1"), "1") # func is never called with one item + self.assertRaises(TypeError, reduce, 42, (42, 42)) + self.assertRaises(TypeError, reduce, add, []) # arg 2 must not be empty sequence with no initial value + self.assertRaises(TypeError, reduce, add, "") + self.assertRaises(TypeError, reduce, add, ()) + self.assertEqual(reduce(add, [], None), None) + self.assertEqual(reduce(add, [], 42), 42) + + class BadSeq: + def __getitem__(self, index): + raise ValueError + self.assertRaises(ValueError, reduce, 42, BadSeq()) + + def test_reload(self): + import marshal + reload(marshal) + import string + reload(string) + ## import sys + ## self.assertRaises(ImportError, reload, sys) + + def test_repr(self): + self.assertEqual(repr(''), '\'\'') + self.assertEqual(repr(0), '0') + # self.assertEqual(repr(0L), '0L') + self.assertEqual(repr(()), '()') + self.assertEqual(repr([]), '[]') + self.assertEqual(repr({}), '{}') + a = [] + a.append(a) + self.assertEqual(repr(a), '[[...]]') + a = {} + a[0] = a + self.assertEqual(repr(a), '{0: {...}}') + + @expectedFailurePY3 + def test_round(self): + self.assertEqual(round(0.0), 0.0) + self.assertEqual(type(round(0.0)), float) # Will be int in 3.0. + self.assertEqual(round(1.0), 1.0) + self.assertEqual(round(10.0), 10.0) + self.assertEqual(round(1000000000.0), 1000000000.0) + self.assertEqual(round(1e20), 1e20) + + self.assertEqual(round(-1.0), -1.0) + self.assertEqual(round(-10.0), -10.0) + self.assertEqual(round(-1000000000.0), -1000000000.0) + self.assertEqual(round(-1e20), -1e20) + + self.assertEqual(round(0.1), 0.0) + self.assertEqual(round(1.1), 1.0) + self.assertEqual(round(10.1), 10.0) + self.assertEqual(round(1000000000.1), 1000000000.0) + + self.assertEqual(round(-1.1), -1.0) + self.assertEqual(round(-10.1), -10.0) + self.assertEqual(round(-1000000000.1), -1000000000.0) + + self.assertEqual(round(0.9), 1.0) + self.assertEqual(round(9.9), 10.0) + self.assertEqual(round(999999999.9), 1000000000.0) + + self.assertEqual(round(-0.9), -1.0) + self.assertEqual(round(-9.9), -10.0) + self.assertEqual(round(-999999999.9), -1000000000.0) + + self.assertEqual(round(-8.0, -1), -10.0) + self.assertEqual(type(round(-8.0, -1)), float) + + self.assertEqual(type(round(-8.0, 0)), float) + self.assertEqual(type(round(-8.0, 1)), float) + + # Check half rounding behaviour. + self.assertEqual(round(5.5), 6) + self.assertEqual(round(6.5), 7) + self.assertEqual(round(-5.5), -6) + self.assertEqual(round(-6.5), -7) + + # Check behavior on ints + self.assertEqual(round(0), 0) + self.assertEqual(round(8), 8) + self.assertEqual(round(-8), -8) + self.assertEqual(type(round(0)), float) # Will be int in 3.0. + self.assertEqual(type(round(-8, -1)), float) + self.assertEqual(type(round(-8, 0)), float) + self.assertEqual(type(round(-8, 1)), float) + + # test new kwargs + self.assertEqual(round(number=-8.0, ndigits=-1), -10.0) + + self.assertRaises(TypeError, round) + + # test generic rounding delegation for reals + class TestRound(object): + def __float__(self): + return 23.0 + + class TestNoRound(object): + pass + + self.assertEqual(round(TestRound()), 23) + + self.assertRaises(TypeError, round, 1, 2, 3) + self.assertRaises(TypeError, round, TestNoRound()) + + t = TestNoRound() + t.__float__ = lambda *args: args + self.assertRaises(TypeError, round, t) + self.assertRaises(TypeError, round, t, 0) + + # Some versions of glibc for alpha have a bug that affects + # float -> integer rounding (floor, ceil, rint, round) for + # values in the range [2**52, 2**53). See: + # + # http://sources.redhat.com/bugzilla/show_bug.cgi?id=5350 + # + # We skip this test on Linux/alpha if it would fail. + linux_alpha = (platform.system().startswith('Linux') and + platform.machine().startswith('alpha')) + system_round_bug = round(5e15+1) != 5e15+1 + @unittest.skipIf(linux_alpha and system_round_bug, + "test will fail; failure is probably due to a " + "buggy system round function") + def test_round_large(self): + # Issue #1869: integral floats should remain unchanged + self.assertEqual(round(5e15-1), 5e15-1) + self.assertEqual(round(5e15), 5e15) + self.assertEqual(round(5e15+1), 5e15+1) + self.assertEqual(round(5e15+2), 5e15+2) + self.assertEqual(round(5e15+3), 5e15+3) + + def test_setattr(self): + setattr(sys, 'spam', 1) + self.assertEqual(sys.spam, 1) + self.assertRaises(TypeError, setattr, sys, 1, 'spam') + self.assertRaises(TypeError, setattr) + + def test_sum(self): + self.assertEqual(sum([]), 0) + self.assertEqual(sum(range(2,8)), 27) + self.assertEqual(sum(iter(range(2,8))), 27) + self.assertEqual(sum(Squares(10)), 285) + self.assertEqual(sum(iter(Squares(10))), 285) + self.assertEqual(sum([[1], [2], [3]], []), [1, 2, 3]) + + self.assertRaises(TypeError, sum) + self.assertRaises(TypeError, sum, 42) + self.assertRaises(TypeError, sum, ['a', 'b', 'c']) + self.assertRaises(TypeError, sum, ['a', 'b', 'c'], '') + self.assertRaises(TypeError, sum, [[1], [2], [3]]) + self.assertRaises(TypeError, sum, [{2:3}]) + self.assertRaises(TypeError, sum, [{2:3}]*2, {2:3}) + + class BadSeq: + def __getitem__(self, index): + raise ValueError + self.assertRaises(ValueError, sum, BadSeq()) + + empty = [] + sum(([x] for x in range(10)), empty) + self.assertEqual(empty, []) + + def test_type(self): + self.assertEqual(type(''), type('123')) + self.assertNotEqual(type(''), type(())) + + @expectedFailurePY3 + def test_unichr(self): + if True: # Was: if have_unicode: + self.assertEqual(unichr(32), unicode(' ')) + self.assertEqual(unichr(65), unicode('A')) + self.assertEqual(unichr(97), unicode('a')) + self.assertEqual( + unichr(sys.maxunicode), + unicode('\\U%08x' % (sys.maxunicode), 'unicode-escape') + ) + self.assertRaises(ValueError, unichr, sys.maxunicode+1) + self.assertRaises(TypeError, unichr) + self.assertRaises((OverflowError, ValueError), unichr, 2**32) + + # We don't want self in vars(), so these are static methods + + @staticmethod + def get_vars_f0(): + return vars() + + @staticmethod + def get_vars_f2(): + BuiltinTest.get_vars_f0() + a = 1 + b = 2 + return vars() + + class C_get_vars(object): + def getDict(self): + return {'a':2} + __dict__ = property(fget=getDict) + + def test_vars(self): + self.assertEqual(set(vars()), set(dir())) + import sys + self.assertEqual(set(vars(sys)), set(dir(sys))) + self.assertEqual(self.get_vars_f0(), {}) + self.assertEqual(self.get_vars_f2(), {'a': 1, 'b': 2}) + self.assertRaises(TypeError, vars, 42, 42) + self.assertRaises(TypeError, vars, 42) + self.assertEqual(vars(self.C_get_vars()), {'a':2}) + + def test_zip(self): + a = (1, 2, 3) + b = (4, 5, 6) + t = [(1, 4), (2, 5), (3, 6)] + self.assertEqual(zip(a, b), t) + b = [4, 5, 6] + self.assertEqual(zip(a, b), t) + b = (4, 5, 6, 7) + self.assertEqual(zip(a, b), t) + class I: + def __getitem__(self, i): + if i < 0 or i > 2: raise IndexError + return i + 4 + self.assertEqual(zip(a, I()), t) + self.assertEqual(zip(), []) + self.assertEqual(zip(*[]), []) + self.assertRaises(TypeError, zip, None) + class G: + pass + self.assertRaises(TypeError, zip, a, G()) + + # Make sure zip doesn't try to allocate a billion elements for the + # result list when one of its arguments doesn't say how long it is. + # A MemoryError is the most likely failure mode. + class SequenceWithoutALength: + def __getitem__(self, i): + if i == 5: + raise IndexError + else: + return i + self.assertEqual( + zip(SequenceWithoutALength(), xrange(2**30)), + list(enumerate(range(5))) + ) + + class BadSeq: + def __getitem__(self, i): + if i == 5: + raise ValueError + else: + return i + self.assertRaises(ValueError, zip, BadSeq(), BadSeq()) + + @skip26 + @expectedFailurePY3 + def test_format(self): + # Test the basic machinery of the format() builtin. Don't test + # the specifics of the various formatters + self.assertEqual(format(3, ''), '3') + + # Returns some classes to use for various tests. There's + # an old-style version, and a new-style version + def classes_new(): + class A(object): + def __init__(self, x): + self.x = x + def __format__(self, format_spec): + return str(self.x) + format_spec + class DerivedFromA(A): + pass + + class Simple(object): pass + class DerivedFromSimple(Simple): + def __init__(self, x): + self.x = x + def __format__(self, format_spec): + return str(self.x) + format_spec + class DerivedFromSimple2(DerivedFromSimple): pass + return A, DerivedFromA, DerivedFromSimple, DerivedFromSimple2 + + # In 3.0, classes_classic has the same meaning as classes_new + def classes_classic(): + class A: + def __init__(self, x): + self.x = x + def __format__(self, format_spec): + return str(self.x) + format_spec + class DerivedFromA(A): + pass + + class Simple: pass + class DerivedFromSimple(Simple): + def __init__(self, x): + self.x = x + def __format__(self, format_spec): + return str(self.x) + format_spec + class DerivedFromSimple2(DerivedFromSimple): pass + return A, DerivedFromA, DerivedFromSimple, DerivedFromSimple2 + + def class_test(A, DerivedFromA, DerivedFromSimple, DerivedFromSimple2): + self.assertEqual(format(A(3), 'spec'), '3spec') + self.assertEqual(format(DerivedFromA(4), 'spec'), '4spec') + self.assertEqual(format(DerivedFromSimple(5), 'abc'), '5abc') + self.assertEqual(format(DerivedFromSimple2(10), 'abcdef'), + '10abcdef') + + class_test(*classes_new()) + class_test(*classes_classic()) + + def empty_format_spec(value): + # test that: + # format(x, '') == str(x) + # format(x) == str(x) + self.assertEqual(format(value, ""), str(value)) + self.assertEqual(format(value), str(value)) + + # for builtin types, format(x, "") == str(x) + empty_format_spec(17**13) + empty_format_spec(1.0) + empty_format_spec(3.1415e104) + empty_format_spec(-3.1415e104) + empty_format_spec(3.1415e-104) + empty_format_spec(-3.1415e-104) + empty_format_spec(object) + empty_format_spec(None) + + # TypeError because self.__format__ returns the wrong type + class BadFormatResult: + def __format__(self, format_spec): + return 1.0 + self.assertRaises(TypeError, format, BadFormatResult(), "") + + # TypeError because format_spec is not unicode or str + self.assertRaises(TypeError, format, object(), 4) + self.assertRaises(TypeError, format, object(), object()) + + # tests for object.__format__ really belong elsewhere, but + # there's no good place to put them + x = object().__format__('') + self.assertTrue(x.startswith('= 4: + if should_raise_warning: + self.assertRaises(TypeError, format, obj, fmt_str) + else: + try: + format(obj, fmt_str) + except TypeError: + self.fail('object.__format__ raised TypeError unexpectedly') + else: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always", PendingDeprecationWarning) + format(obj, fmt_str) + if should_raise_warning: + self.assertEqual(len(w), 1) + self.assertIsInstance(w[0].message, PendingDeprecationWarning) + self.assertIn('object.__format__ with a non-empty format ' + 'string', str(w[0].message)) + else: + self.assertEqual(len(w), 0) + + fmt_strs = ['', 's', u'', u's'] + + class A: + def __format__(self, fmt_str): + return format('', fmt_str) + + for fmt_str in fmt_strs: + test_deprecated_format_string(A(), fmt_str, False) + + class B: + pass + + class C(object): + pass + + for cls in [object, B, C]: + for fmt_str in fmt_strs: + test_deprecated_format_string(cls(), fmt_str, len(fmt_str) != 0) + # -------------------------------------------------------------------- + + # make sure we can take a subclass of str as a format spec + class DerivedFromStr(str): pass + self.assertEqual(format(0, DerivedFromStr('10')), ' 0') + + def test_bin(self): + self.assertEqual(bin(0), '0b0') + self.assertEqual(bin(1), '0b1') + self.assertEqual(bin(-1), '-0b1') + self.assertEqual(bin(2**65), '0b1' + '0' * 65) + self.assertEqual(bin(2**65-1), '0b' + '1' * 65) + self.assertEqual(bin(-(2**65)), '-0b1' + '0' * 65) + self.assertEqual(bin(-(2**65-1)), '-0b' + '1' * 65) + + @expectedFailurePY3 + def test_bytearray_translate(self): + x = bytearray(b"abc") + self.assertRaises(ValueError, x.translate, "1", 1) + self.assertRaises(TypeError, x.translate, "1"*256, 1) + +class TestSorted(unittest.TestCase): + + @expectedFailurePY3 + def test_basic(self): + data = range(100) + copy = data[:] + random.shuffle(copy) + self.assertEqual(data, sorted(copy)) + self.assertNotEqual(data, copy) + + data.reverse() + random.shuffle(copy) + self.assertEqual(data, sorted(copy, cmp=lambda x, y: cmp(y,x))) + self.assertNotEqual(data, copy) + random.shuffle(copy) + self.assertEqual(data, sorted(copy, key=lambda x: -x)) + self.assertNotEqual(data, copy) + random.shuffle(copy) + self.assertEqual(data, sorted(copy, reverse=1)) + self.assertNotEqual(data, copy) + + def test_inputtypes(self): + s = 'abracadabra' + types = [list, tuple] + if True: # Was: if have_unicode: + types.insert(0, unicode) + for T in types: + self.assertEqual(sorted(s), sorted(T(s))) + + s = ''.join(dict.fromkeys(s).keys()) # unique letters only + types = [set, frozenset, list, tuple, dict.fromkeys] + if True: # Was: if have_unicode: + types.insert(0, unicode) + for T in types: + self.assertEqual(sorted(s), sorted(T(s))) + + def test_baddecorator(self): + data = 'The quick Brown fox Jumped over The lazy Dog'.split() + self.assertRaises(TypeError, sorted, data, None, lambda x,y: 0) + +# def _run_unittest(*args): +# # with check_py3k_warnings( +# # (".+ not supported in 3.x", DeprecationWarning), +# # (".+ is renamed to imp.reload", DeprecationWarning), +# # ("classic int division", DeprecationWarning)): +# if True: +# run_unittest(*args) +# +# def test_main(verbose=None): +# test_classes = (BuiltinTest, TestSorted) +# +# _run_unittest(*test_classes) +# +# # verify reference counting +# if verbose and hasattr(sys, "gettotalrefcount"): +# import gc +# counts = [None] * 5 +# for i in xrange(len(counts)): +# _run_unittest(*test_classes) +# gc.collect() +# counts[i] = sys.gettotalrefcount() +# print(counts) + + +if __name__ == "__main__": + # test_main(verbose=True) + unittest.main() diff --git a/tests/test_past/test_misc.py b/tests/test_past/test_misc.py new file mode 100644 index 00000000..0367b3db --- /dev/null +++ b/tests/test_past/test_misc.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +""" +Tests for the resurrected Py2-like cmp function +""" + +from __future__ import absolute_import, unicode_literals, print_function + +import os.path +import sys +import traceback +from contextlib import contextmanager + +from future.tests.base import unittest +from future.utils import PY3, PY26 + +if PY3: + from past.builtins import cmp + +_dir = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(_dir) +import test_values + + +@contextmanager +def empty_context_manager(*args, **kwargs): + yield dict(args=args, kwargs=kwargs) + + +class TestCmp(unittest.TestCase): + def test_cmp(self): + for x, y, cmp_python2_value in test_values.cmp_python2_value: + if PY26: + # set cmp works a bit differently in 2.6, we try to emulate 2.7 behavior, so skip set cmp tests + if isinstance(x, set) or isinstance(y, set): + continue + # to get this to run on python <3.4 which lacks subTest + with getattr(self, 'subTest', empty_context_manager)(x=x, y=y): + try: + past_cmp_value = cmp(x, y) + except Exception: + past_cmp_value = traceback.format_exc().strip().split('\n')[-1] + + self.assertEqual(cmp_python2_value, past_cmp_value, + "expected result matching python2 __builtins__.cmp({x!r},{y!r}) " + "== {cmp_python2_value} " + "got past.builtins.cmp({x!r},{y!r}) " + "== {past_cmp_value} " + "".format(x=x, y=y, past_cmp_value=past_cmp_value, + cmp_python2_value=cmp_python2_value)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_past/test_noniterators.py b/tests/test_past/test_noniterators.py new file mode 100644 index 00000000..518109c2 --- /dev/null +++ b/tests/test_past/test_noniterators.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +""" +Tests for the Py2-like list-producing functions +""" + +from __future__ import absolute_import, unicode_literals, print_function +import os + +from past import utils +from future.tests.base import unittest +from past.builtins import filter, map, range, zip + + +class TestNonIterators(unittest.TestCase): + + def test_noniterators_produce_lists(self): + l = range(10) + self.assertTrue(isinstance(l, list)) + + l2 = zip(l, list('ABCDE')*2) + self.assertTrue(isinstance(l2, list)) + + double = lambda x: x*2 + l3 = map(double, l) + self.assertTrue(isinstance(l3, list)) + + is_odd = lambda x: x % 2 == 1 + l4 = filter(is_odd, range(10)) + self.assertEqual(l4, [1, 3, 5, 7, 9]) + self.assertTrue(isinstance(l4, list)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_past/test_olddict.py b/tests/test_past/test_olddict.py new file mode 100644 index 00000000..9f210608 --- /dev/null +++ b/tests/test_past/test_olddict.py @@ -0,0 +1,791 @@ +# -*- coding: utf-8 -*- +""" +Tests for the resurrected Py2-like class:`dict` type. +""" + +from __future__ import absolute_import, unicode_literals, print_function +import os +import sys + +from future.utils import implements_iterator, PY3 +from future.tests.base import unittest, skip26 +from past.builtins import dict + + +class TestOldDict(unittest.TestCase): + def setUp(self): + self.d1 = dict({'C': 1, 'B': 2, 'A': 3}) + self.d2 = dict(key1='value1', key2='value2') + + def test_dict_empty(self): + """ + dict() -> {} + """ + self.assertEqual(dict(), {}) + + def test_dict_eq(self): + d = self.d1 + self.assertEqual(dict(d), d) + + def test_dict_keys(self): + """ + The keys, values and items methods should now return lists on + Python 3.x. + """ + d = self.d1 + self.assertEqual(set(dict(d)), set(d)) + self.assertEqual(set(dict(d).keys()), set(d.keys())) + keys = dict(d).keys() + assert isinstance(keys, list) + key0 = keys[0] + + def test_dict_values(self): + d = self.d1 + self.assertEqual(set(dict(d).values()), set(d.values())) + values = dict(d).values() + assert isinstance(values, list) + val0 = values[0] + + def test_dict_items(self): + d = self.d1 + self.assertEqual(set(dict(d).items()), set(d.items())) + items = dict(d).items() + assert isinstance(items, list) + item0 = items[0] + + def test_isinstance_dict(self): + self.assertTrue(isinstance(self.d1, dict)) + + def test_dict_getitem(self): + d = dict({'C': 1, 'B': 2, 'A': 3}) + self.assertEqual(d['C'], 1) + self.assertEqual(d['B'], 2) + self.assertEqual(d['A'], 3) + with self.assertRaises(KeyError): + self.assertEqual(d['D']) + + def test_methods_produce_lists(self): + for d in (dict(self.d1), self.d2): + assert isinstance(d.keys(), list) + assert isinstance(d.values(), list) + assert isinstance(d.items(), list) + + @unittest.skipIf(sys.version_info[:2] == (2, 6), + 'set-like behaviour of dict methods is only available in Py2.7+') + def test_set_like_behaviour(self): + d1, d2 = self.d1, self.d2 + self.assertEqual(dict(d1).viewkeys() & dict(d2).viewkeys(), set()) + self.assertEqual(dict(d1).viewkeys() | dict(d2).viewkeys(), + set(['key1', 'key2', 'C', 'B', 'A'])) + self.assertTrue(isinstance(d1.viewvalues() | d2.viewkeys(), set)) + self.assertTrue(isinstance(d1.viewitems() | d2.viewitems(), set)) + + with self.assertRaises(TypeError): + d1.values() | d2.values() + d1.keys() | d2.keys() + d1.items() | d2.items() + + def test_braces_create_newdict_object(self): + """ + It would nice if the {} dict syntax could be coaxed + into producing our new dict objects somehow ... + """ + d = self.d1 + if False: # This doesn't work ... + self.assertTrue(type(d) == dict) + + +# import UserDict +import random, string +import gc, weakref + + +class Py2DictTest(unittest.TestCase): + """ + These are Py2/3-compatible ports of the unit tests from Python 2.7's + tests/test_dict.py + """ + + def test_constructor(self): + # calling built-in types without argument must return empty + self.assertEqual(dict(), {}) + self.assertIsNot(dict(), {}) + + @skip26 + def test_literal_constructor(self): + # check literal constructor for different sized dicts + # (to exercise the BUILD_MAP oparg). + for n in (0, 1, 6, 256, 400): + items = [(''.join(random.sample(string.ascii_letters, 8)), i) + for i in range(n)] + random.shuffle(items) + formatted_items = ('{!r}: {:d}'.format(k, v) for k, v in items) + dictliteral = '{' + ', '.join(formatted_items) + '}' + self.assertEqual(eval(dictliteral), dict(items)) + + def test_bool(self): + self.assertIs(not dict(), True) + self.assertTrue(dict({1: 2})) + self.assertIs(bool(dict({})), False) + self.assertIs(bool(dict({1: 2})), True) + + def test_keys(self): + d = dict() + self.assertEqual(d.keys(), []) + d = dict({'a': 1, 'b': 2}) + k = d.keys() + self.assertTrue(d.has_key('a')) + self.assertTrue(d.has_key('b')) + + self.assertRaises(TypeError, d.keys, None) + + def test_values(self): + d = dict() + self.assertEqual(d.values(), []) + d = dict({1:2}) + self.assertEqual(d.values(), [2]) + + self.assertRaises(TypeError, d.values, None) + + def test_items(self): + d = dict() + self.assertEqual(d.items(), []) + + d = dict({1:2}) + self.assertEqual(d.items(), [(1, 2)]) + + self.assertRaises(TypeError, d.items, None) + + def test_has_key(self): + d = dict() + self.assertFalse(d.has_key('a')) + d = dict({'a': 1, 'b': 2}) + k = d.keys() + k.sort() + self.assertEqual(k, ['a', 'b']) + + self.assertRaises(TypeError, d.has_key) + + def test_contains(self): + d = dict() + self.assertNotIn('a', d) + self.assertFalse('a' in d) + self.assertTrue('a' not in d) + d = dict({'a': 1, 'b': 2}) + self.assertIn('a', d) + self.assertIn('b', d) + self.assertNotIn('c', d) + + self.assertRaises(TypeError, d.__contains__) + + def test_len(self): + d = dict() + self.assertEqual(len(d), 0) + d = dict({'a': 1, 'b': 2}) + self.assertEqual(len(d), 2) + + def test_getitem(self): + d = dict({'a': 1, 'b': 2}) + self.assertEqual(d['a'], 1) + self.assertEqual(d['b'], 2) + d['c'] = 3 + d['a'] = 4 + self.assertEqual(d['c'], 3) + self.assertEqual(d['a'], 4) + del d['b'] + self.assertEqual(d, dict({'a': 4, 'c': 3})) + + self.assertRaises(TypeError, d.__getitem__) + + class BadEq(object): + def __eq__(self, other): + raise Exc() + def __hash__(self): + return 24 + + d = dict() + d[BadEq()] = 42 + self.assertRaises(KeyError, d.__getitem__, 23) + + class Exc(Exception): pass + + class BadHash(object): + fail = False + def __hash__(self): + if self.fail: + raise Exc() + else: + return 42 + + x = BadHash() + d[x] = 42 + x.fail = True + self.assertRaises(Exc, d.__getitem__, x) + + def test_clear(self): + d = dict({1:1, 2:2, 3:3}) + d.clear() + self.assertEqual(d, {}) + + self.assertRaises(TypeError, d.clear, None) + + def test_update(self): + d = dict() + d.update({1:100}) + d.update(dict({2:20})) + d.update({1:1, 2:2, 3:3}) + self.assertEqual(d, {1:1, 2:2, 3:3}) + + d.update() + self.assertEqual(d, {1:1, 2:2, 3:3}) + + self.assertRaises((TypeError, AttributeError), d.update, None) + + class SimpleUserDict: + def __init__(self): + self.d = dict({1:1, 2:2, 3:3}) + def keys(self): + return self.d.keys() + def __getitem__(self, i): + return self.d[i] + d.clear() + d.update(SimpleUserDict()) + self.assertEqual(d, {1:1, 2:2, 3:3}) + + class Exc(Exception): pass + + d.clear() + class FailingUserDict: + def keys(self): + raise Exc + self.assertRaises(Exc, d.update, FailingUserDict()) + + class FailingUserDict: + def keys(self): + @implements_iterator + class BogonIter: + def __init__(self): + self.i = 1 + def __iter__(self): + return self + def __next__(self): + if self.i: + self.i = 0 + return 'a' + raise Exc + return BogonIter() + def __getitem__(self, key): + return key + self.assertRaises(Exc, d.update, FailingUserDict()) + + class FailingUserDict: + def keys(self): + @implements_iterator + class BogonIter: + def __init__(self): + self.i = ord('a') + def __iter__(self): + return self + def __next__(self): + if self.i <= ord('z'): + rtn = chr(self.i) + self.i += 1 + return rtn + raise StopIteration + return BogonIter() + def __getitem__(self, key): + raise Exc + self.assertRaises(Exc, d.update, FailingUserDict()) + + @implements_iterator + class badseq(object): + def __iter__(self): + return self + def __next__(self): + raise Exc() + + self.assertRaises(Exc, {}.update, badseq()) + + self.assertRaises(ValueError, {}.update, [(1, 2, 3)]) + + def test_fromkeys(self): + self.assertEqual(dict.fromkeys('abc'), {'a':None, 'b':None, 'c':None}) + d = dict() + self.assertIsNot(d.fromkeys('abc'), d) + self.assertEqual(d.fromkeys('abc'), {'a':None, 'b':None, 'c':None}) + self.assertEqual(d.fromkeys((4,5),0), {4:0, 5:0}) + self.assertEqual(d.fromkeys([]), {}) + def g(): + yield 1 + self.assertEqual(d.fromkeys(g()), {1:None}) + self.assertRaises(TypeError, dict().fromkeys, 3) + class dictlike(dict): pass + self.assertEqual(dictlike.fromkeys('a'), {'a':None}) + self.assertEqual(dictlike().fromkeys('a'), {'a':None}) + self.assertIsInstance(dictlike.fromkeys('a'), dictlike) + self.assertIsInstance(dictlike().fromkeys('a'), dictlike) + # class mydict(dict): + # def __new__(cls): + # return UserDict.UserDict() + # ud = mydict.fromkeys('ab') + # self.assertEqual(ud, {'a':None, 'b':None}) + # self.assertIsInstance(ud, UserDict.UserDict) + # self.assertRaises(TypeError, dict.fromkeys) + + class Exc(Exception): pass + + class baddict1(dict): + def __init__(self): + raise Exc() + + self.assertRaises(Exc, baddict1.fromkeys, [1]) + + @implements_iterator + class BadSeq(object): + def __iter__(self): + return self + def __next__(self): + raise Exc() + + self.assertRaises(Exc, dict.fromkeys, BadSeq()) + + class baddict2(dict): + def __setitem__(self, key, value): + raise Exc() + + self.assertRaises(Exc, baddict2.fromkeys, [1]) + + # test fast path for dictionary inputs + d = dict(zip(range(6), range(6))) + self.assertEqual(dict.fromkeys(d, 0), dict(zip(range(6), [0]*6))) + + class baddict3(dict): + def __new__(cls): + return d + d = dict((i, i) for i in range(10)) + res = d.copy() + res.update(a=None, b=None, c=None) + # Was: self.assertEqual(baddict3.fromkeys(set(["a", "b", "c"])), res) + # Infinite loop on Python 2.6 and 2.7 ... + + def test_copy(self): + d = dict({1:1, 2:2, 3:3}) + self.assertEqual(d.copy(), {1:1, 2:2, 3:3}) + self.assertEqual({}.copy(), {}) + self.assertRaises(TypeError, d.copy, None) + + def test_get(self): + d = dict() + self.assertIs(d.get('c'), None) + self.assertEqual(d.get('c', 3), 3) + d = dict({'a': 1, 'b': 2}) + self.assertIs(d.get('c'), None) + self.assertEqual(d.get('c', 3), 3) + self.assertEqual(d.get('a'), 1) + self.assertEqual(d.get('a', 3), 1) + self.assertRaises(TypeError, d.get) + self.assertRaises(TypeError, d.get, None, None, None) + + @skip26 + def test_setdefault(self): + # dict.setdefault() + d = dict() + self.assertIs(d.setdefault('key0'), None) + d.setdefault('key0', []) + self.assertIs(d.setdefault('key0'), None) + d.setdefault('key', []).append(3) + self.assertEqual(d['key'][0], 3) + d.setdefault('key', []).append(4) + self.assertEqual(len(d['key']), 2) + self.assertRaises(TypeError, d.setdefault) + + class Exc(Exception): pass + + class BadHash(object): + fail = False + def __hash__(self): + if self.fail: + raise Exc() + else: + return 42 + + x = BadHash() + d[x] = 42 + x.fail = True + self.assertRaises(Exc, d.setdefault, x, []) + + @skip26 + def test_setdefault_atomic(self): + # Issue #13521: setdefault() calls __hash__ and __eq__ only once. + class Hashed(object): + def __init__(self): + self.hash_count = 0 + self.eq_count = 0 + def __hash__(self): + self.hash_count += 1 + return 42 + def __eq__(self, other): + self.eq_count += 1 + return id(self) == id(other) + hashed1 = Hashed() + y = dict({hashed1: 5}) + hashed2 = Hashed() + y.setdefault(hashed2, []) + self.assertEqual(hashed1.hash_count, 1) + if PY3: + self.assertEqual(hashed2.hash_count, 1) + self.assertEqual(hashed1.eq_count + hashed2.eq_count, 1) + + def test_popitem(self): + # dict.popitem() + for copymode in -1, +1: + # -1: b has same structure as a + # +1: b is a.copy() + for log2size in range(12): + size = 2**log2size + a = dict() + b = dict() + for i in range(size): + a[repr(i)] = i + if copymode < 0: + b[repr(i)] = i + if copymode > 0: + b = a.copy() + for i in range(size): + ka, va = ta = a.popitem() + self.assertEqual(va, int(ka)) + kb, vb = tb = b.popitem() + self.assertEqual(vb, int(kb)) + self.assertFalse(copymode < 0 and ta != tb) + self.assertFalse(a) + self.assertFalse(b) + + d = dict() + self.assertRaises(KeyError, d.popitem) + + def test_pop(self): + # Tests for pop with specified key + d = dict() + k, v = 'abc', 'def' + d[k] = v + self.assertRaises(KeyError, d.pop, 'ghi') + + self.assertEqual(d.pop(k), v) + self.assertEqual(len(d), 0) + + self.assertRaises(KeyError, d.pop, k) + + self.assertEqual(d.pop(k, v), v) + d[k] = v + self.assertEqual(d.pop(k, 1), v) + + self.assertRaises(TypeError, d.pop) + + class Exc(Exception): pass + + class BadHash(object): + fail = False + def __hash__(self): + if self.fail: + raise Exc() + else: + return 42 + + x = BadHash() + d[x] = 42 + x.fail = True + self.assertRaises(Exc, d.pop, x) + + def test_mutatingiteration(self): + # changing dict size during iteration + d = dict() + d[1] = 1 + with self.assertRaises(RuntimeError): + for i in d: + d[i+1] = 1 + + def test_repr(self): + d = dict() + self.assertEqual(repr(d), '{}') + d[1] = 2 + self.assertEqual(repr(d), '{1: 2}') + d = dict() + d[1] = d + self.assertEqual(repr(d), '{1: {...}}') + + class Exc(Exception): pass + + class BadRepr(object): + def __repr__(self): + raise Exc() + + d = dict({1: BadRepr()}) + self.assertRaises(Exc, repr, d) + + @unittest.skip('Comparing dicts for order has not been forward-ported') + def test_le(self): + self.assertFalse(dict() < {}) + self.assertFalse(dict() < dict()) + self.assertFalse(dict({1: 2}) < {1: 2}) + + class Exc(Exception): pass + + class BadCmp(object): + def __eq__(self, other): + raise Exc() + def __hash__(self): + return 42 + + d1 = dict({BadCmp(): 1}) + d2 = dict({1: 1}) + + with self.assertRaises(Exc): + d1 < d2 + + @skip26 + def test_missing(self): + # Make sure dict doesn't have a __missing__ method + self.assertFalse(hasattr(dict, "__missing__")) + self.assertFalse(hasattr(dict(), "__missing__")) + # Test several cases: + # (D) subclass defines __missing__ method returning a value + # (E) subclass defines __missing__ method raising RuntimeError + # (F) subclass sets __missing__ instance variable (no effect) + # (G) subclass doesn't define __missing__ at a all + class D(dict): + def __missing__(self, key): + return 42 + d = D({1: 2, 3: 4}) + self.assertEqual(d[1], 2) + self.assertEqual(d[3], 4) + self.assertNotIn(2, d) + self.assertNotIn(2, d.keys()) + self.assertEqual(d[2], 42) + + class E(dict): + def __missing__(self, key): + raise RuntimeError(key) + e = E() + with self.assertRaises(RuntimeError) as c: + e[42] + self.assertEqual(c.exception.args, (42,)) + + class F(dict): + def __init__(self): + # An instance variable __missing__ should have no effect + self.__missing__ = lambda key: None + f = F() + with self.assertRaises(KeyError) as c: + f[42] + self.assertEqual(c.exception.args, (42,)) + + class G(dict): + pass + g = G() + with self.assertRaises(KeyError) as c: + g[42] + self.assertEqual(c.exception.args, (42,)) + + @skip26 + def test_tuple_keyerror(self): + # SF #1576657 + d = dict() + with self.assertRaises(KeyError) as c: + d[(1,)] + self.assertEqual(c.exception.args, ((1,),)) + + # def test_bad_key(self): + # # Dictionary lookups should fail if __cmp__() raises an exception. + # class CustomException(Exception): + # pass + + # class BadDictKey: + # def __hash__(self): + # return hash(self.__class__) + + # def __cmp__(self, other): + # if isinstance(other, self.__class__): + # raise CustomException + # return other + + # d = dict() + # x1 = BadDictKey() + # x2 = BadDictKey() + # d[x1] = 1 + # for stmt in ['d[x2] = 2', + # 'z = d[x2]', + # 'x2 in d', + # 'd.has_key(x2)', + # 'd.get(x2)', + # 'd.setdefault(x2, 42)', + # 'd.pop(x2)', + # 'd.update({x2: 2})']: + # with self.assertRaises(CustomException): + # utils.exec_(stmt, locals()) + # + # def test_resize1(self): + # # Dict resizing bug, found by Jack Jansen in 2.2 CVS development. + # # This version got an assert failure in debug build, infinite loop in + # # release build. Unfortunately, provoking this kind of stuff requires + # # a mix of inserts and deletes hitting exactly the right hash codes in + # # exactly the right order, and I can't think of a randomized approach + # # that would be *likely* to hit a failing case in reasonable time. + + # d = {} + # for i in range(5): + # d[i] = i + # for i in range(5): + # del d[i] + # for i in range(5, 9): # i==8 was the problem + # d[i] = i + + # def test_resize2(self): + # # Another dict resizing bug (SF bug #1456209). + # # This caused Segmentation faults or Illegal instructions. + + # class X(object): + # def __hash__(self): + # return 5 + # def __eq__(self, other): + # if resizing: + # d.clear() + # return False + # d = {} + # resizing = False + # d[X()] = 1 + # d[X()] = 2 + # d[X()] = 3 + # d[X()] = 4 + # d[X()] = 5 + # # now trigger a resize + # resizing = True + # d[9] = 6 + + # def test_empty_presized_dict_in_freelist(self): + # # Bug #3537: if an empty but presized dict with a size larger + # # than 7 was in the freelist, it triggered an assertion failure + # with self.assertRaises(ZeroDivisionError): + # d = {'a': 1 // 0, 'b': None, 'c': None, 'd': None, 'e': None, + # 'f': None, 'g': None, 'h': None} + # d = {} + + # def test_container_iterator(self): + # # Bug #3680: tp_traverse was not implemented for dictiter objects + # class C(object): + # pass + # iterators = (dict.iteritems, dict.itervalues, dict.iterkeys) + # for i in iterators: + # obj = C() + # ref = weakref.ref(obj) + # container = {obj: 1} + # obj.x = i(container) + # del obj, container + # gc.collect() + # self.assertIs(ref(), None, "Cycle was not collected") + + # def _not_tracked(self, t): + # # Nested containers can take several collections to untrack + # gc.collect() + # gc.collect() + # self.assertFalse(gc.is_tracked(t), t) + + # def _tracked(self, t): + # self.assertTrue(gc.is_tracked(t), t) + # gc.collect() + # gc.collect() + # self.assertTrue(gc.is_tracked(t), t) + + # @test_support.cpython_only + # def test_track_literals(self): + # # Test GC-optimization of dict literals + # x, y, z, w = 1.5, "a", (1, None), [] + + # self._not_tracked({}) + # self._not_tracked({x:(), y:x, z:1}) + # self._not_tracked({1: "a", "b": 2}) + # self._not_tracked({1: 2, (None, True, False, ()): int}) + # self._not_tracked({1: object()}) + + # # Dicts with mutable elements are always tracked, even if those + # # elements are not tracked right now. + # self._tracked({1: []}) + # self._tracked({1: ([],)}) + # self._tracked({1: {}}) + # self._tracked({1: set()}) + + # @test_support.cpython_only + # def test_track_dynamic(self): + # # Test GC-optimization of dynamically-created dicts + # class MyObject(object): + # pass + # x, y, z, w, o = 1.5, "a", (1, object()), [], MyObject() + + # d = dict() + # self._not_tracked(d) + # d[1] = "a" + # self._not_tracked(d) + # d[y] = 2 + # self._not_tracked(d) + # d[z] = 3 + # self._not_tracked(d) + # self._not_tracked(d.copy()) + # d[4] = w + # self._tracked(d) + # self._tracked(d.copy()) + # d[4] = None + # self._not_tracked(d) + # self._not_tracked(d.copy()) + + # # dd isn't tracked right now, but it may mutate and therefore d + # # which contains it must be tracked. + # d = dict() + # dd = dict() + # d[1] = dd + # self._not_tracked(dd) + # self._tracked(d) + # dd[1] = d + # self._tracked(dd) + + # d = dict.fromkeys([x, y, z]) + # self._not_tracked(d) + # dd = dict() + # dd.update(d) + # self._not_tracked(dd) + # d = dict.fromkeys([x, y, z, o]) + # self._tracked(d) + # dd = dict() + # dd.update(d) + # self._tracked(dd) + + # d = dict(x=x, y=y, z=z) + # self._not_tracked(d) + # d = dict(x=x, y=y, z=z, w=w) + # self._tracked(d) + # d = dict() + # d.update(x=x, y=y, z=z) + # self._not_tracked(d) + # d.update(w=w) + # self._tracked(d) + + # d = dict([(x, y), (z, 1)]) + # self._not_tracked(d) + # d = dict([(x, y), (z, w)]) + # self._tracked(d) + # d = dict() + # d.update([(x, y), (z, 1)]) + # self._not_tracked(d) + # d.update([(x, y), (z, w)]) + # self._tracked(d) + + # @test_support.cpython_only + # def test_track_subtypes(self): + # # Dict subtypes are always tracked + # class MyDict(dict): + # pass + # self._tracked(MyDict()) + + +if __name__ == '__main__': + # Only run these tests on Python 3 ... + if PY3: + unittest.main() diff --git a/tests/test_past/test_oldstr.py b/tests/test_past/test_oldstr.py new file mode 100644 index 00000000..17af03c5 --- /dev/null +++ b/tests/test_past/test_oldstr.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +""" +Tests for the resurrected Py2-like 8-bit string type. +""" + +from __future__ import absolute_import, unicode_literals, print_function + +from numbers import Integral +from future.tests.base import unittest +from past.builtins import str as oldstr +from past.types.oldstr import unescape + + +class TestOldStr(unittest.TestCase): + def test_repr(self): + s1 = oldstr(b'abc') + self.assertEqual(repr(s1), "'abc'") + s2 = oldstr(b'abc\ndef') + self.assertEqual(repr(s2), "'abc\\ndef'") + + def test_str(self): + s1 = oldstr(b'abc') + self.assertEqual(str(s1), 'abc') + s2 = oldstr(b'abc\ndef') + self.assertEqual(str(s2), 'abc\ndef') + + def test_unescape(self): + self.assertEqual(unescape('abc\\ndef'), 'abc\ndef') + s = unescape(r'a\\b\c\\d') # i.e. 'a\\\\b\\c\\\\d' + self.assertEqual(str(s), r'a\b\c\d') + s2 = unescape(r'abc\\ndef') # i.e. 'abc\\\\ndef' + self.assertEqual(str(s2), r'abc\ndef') + + def test_getitem(self): + s = oldstr(b'abc') + + self.assertNotEqual(s[0], 97) + self.assertEqual(s[0], b'a') + self.assertEqual(s[0], oldstr(b'a')) + + self.assertEqual(s[1:], b'bc') + self.assertEqual(s[1:], oldstr(b'bc')) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_past/test_translation.py b/tests/test_past/test_translation.py new file mode 100644 index 00000000..58d8d000 --- /dev/null +++ b/tests/test_past/test_translation.py @@ -0,0 +1,738 @@ +# -*- coding: utf-8 -*- +""" +Tests for the Py2-like class:`basestring` type. +""" + +from __future__ import absolute_import, division, print_function +import os +import textwrap +import sys +import tempfile +import io + +from future.tests.base import ( + expectedFailurePY3, + unittest, +) +from past.builtins import ( + str as oldstr, + unicode, +) +from past.translation import install_hooks, remove_hooks, common_substring + + +class TestTranslate(unittest.TestCase): + def setUp(self): + self.tempdir = tempfile.mkdtemp() + os.path.sep + + # def tearDown(self): + # remove_hooks() + + def test_common_substring(self): + s1 = '/home/user/anaconda/envs/future3/lib/python3.3/lib-dynload/math.cpython-33m.so' + s2 = '/home/user/anaconda/envs/future3/lib/python3.3/urllib/__init__.py' + c = '/home/user/anaconda/envs/future3/lib/python3.3' + self.assertEqual(c, common_substring(s1, s2)) + + s1 = r'/Users/Fred Flintstone/Python3.3/lib/something' + s2 = r'/Users/Fred Flintstone/Python3.3/lib/somethingelse' + c = r'/Users/Fred Flintstone/Python3.3/lib' + self.assertEqual(c, common_substring(s1, s2)) + + def write_and_import(self, code, modulename='mymodule'): + self.assertTrue('.py' not in modulename) + filename = modulename + '.py' + if isinstance(code, bytes): + code = code.decode('utf-8') + # Be explicit about encoding the temp file as UTF-8 (issue #63): + with io.open(self.tempdir + filename, 'w', encoding='utf-8') as f: + f.write(textwrap.dedent(code).strip() + '\n') + + # meta_path_len = len(sys.meta_path) + install_hooks(modulename) + # print('Hooks installed') + # assert len(sys.meta_path) == 1 + meta_path_len + # print('sys.meta_path is: {0}'.format(sys.meta_path)) + module = None + + sys.path.insert(0, self.tempdir) + try: + module = __import__(modulename) + except SyntaxError as e: + print('Import failed: %s' % e) + else: + print('Succeeded!') + finally: + remove_hooks() + # print('Hooks removed') + sys.path.remove(self.tempdir) + return module + + def test_print_statement(self): + code = """ + print 'Hello from a Python 2-style print statement!' + finished = True + """ + printer = self.write_and_import(code, 'printer') + self.assertTrue(printer.finished) + + def test_exec_statement(self): + code = """ + exec 'x = 5 + 2' + """ + module = self.write_and_import(code, 'execer') + self.assertEqual(module.x, 7) + + def test_div(self): + code = """ + x = 3 / 2 + """ + module = self.write_and_import(code, 'div') + self.assertEqual(module.x, 1) + + def test_import_future_standard_library(self): + """ + Does futurized Py3-like code like this work under autotranslation?? + """ + code = """ + from future import standard_library + standard_library.install_hooks() + import configparser + """ + module = self.write_and_import(code, 'future_standard_library') + self.assertTrue('configparser' in dir(module)) + from future import standard_library + standard_library.remove_hooks() + + def test_old_builtin_functions(self): + code = """ + # a = raw_input() + import sys + b = open(sys.executable, 'rb') + b.close() + + def is_even(x): + return x % 2 == 0 + c = filter(is_even, range(10)) + + def double(x): + return x * 2 + d = map(double, c) + + e = isinstance('abcd', str) + + for g in xrange(10**3): + pass + + # super(MyClass, self) + """ + module = self.write_and_import(code, 'test_builtin_functions') + self.assertTrue(hasattr(module.b, 'readlines')) + self.assertTrue(isinstance(module.c, list)) + self.assertEqual(module.c, [0, 2, 4, 6, 8]) + self.assertEqual(module.d, [0, 4, 8, 12, 16]) + self.assertTrue(module.e) + + @expectedFailurePY3 + def test_import_builtin_types(self): + code = """ + s1 = 'abcd' + s2 = u'abcd' + b1 = b'abcd' + b2 = s2.encode('utf-8') + d1 = {} + d2 = dict((i, i**2) for i in range(10)) + i1 = 1923482349324234L + i2 = 1923482349324234 + """ + module = self.write_and_import(code, 'test_builtin_types') + self.assertTrue(isinstance(module.s1, oldstr)) + self.assertTrue(isinstance(module.s2, unicode)) + self.assertTrue(isinstance(module.b1, oldstr)) + + def test_xrange(self): + code = ''' + total = 0 + for i in xrange(10): + total += i + ''' + module = self.write_and_import(code, 'xrange') + self.assertEqual(module.total, 45) + + def test_exception_syntax(self): + """ + Test of whether futurize handles the old-style exception syntax + """ + code = """ + value = 'string' + try: + value += 10 + except TypeError, e: # old exception syntax + value += ': success!' + """ + module = self.write_and_import(code, 'py2_exceptions') + self.assertEqual(module.value, 'string: success!') + + +# class TestFuturizeSimple(CodeHandler): +# """ +# This class contains snippets of Python 2 code (invalid Python 3) and +# tests for whether they can be imported correctly from Python 3 with the +# import hooks. +# """ +# +# @unittest.expectedFailure +# def test_problematic_string(self): +# """ This string generates a SyntaxError on Python 3 unless it has +# an r prefix. +# """ +# before = r""" +# s = 'The folder is "C:\Users"'. +# """ +# after = r""" +# s = r'The folder is "C:\Users"'. +# """ +# self.convert_check(before, after) +# +# def test_tobytes(self): +# """ +# The --tobytes option converts all UNADORNED string literals 'abcd' to b'abcd'. +# It does apply to multi-line strings but doesn't apply if it's a raw +# string, because ur'abcd' is a SyntaxError on Python 2 and br'abcd' is a +# SyntaxError on Python 3. +# """ +# before = r""" +# s0 = '1234' +# s1 = '''5678 +# ''' +# s2 = "9abc" +# # Unchanged: +# s3 = r'1234' +# s4 = R"defg" +# s5 = u'hijk' +# s6 = u"lmno" +# s7 = b'lmno' +# s8 = b"pqrs" +# """ +# after = r""" +# s0 = b'1234' +# s1 = b'''5678 +# ''' +# s2 = b"9abc" +# # Unchanged: +# s3 = r'1234' +# s4 = R"defg" +# s5 = u'hijk' +# s6 = u"lmno" +# s7 = b'lmno' +# s8 = b"pqrs" +# """ +# self.convert_check(before, after, tobytes=True) +# +# @unittest.expectedFailure +# def test_izip(self): +# before = """ +# from itertools import izip +# for (a, b) in izip([1, 3, 5], [2, 4, 6]): +# pass +# """ +# after = """ +# from __future__ import unicode_literals +# from future.builtins import zip +# for (a, b) in zip([1, 3, 5], [2, 4, 6]): +# pass +# """ +# self.convert_check(before, after, stages=(1, 2), ignore_imports=False) +# +# @unittest.expectedFailure +# def test_no_unneeded_list_calls(self): +# """ +# TODO: get this working +# """ +# code = """ +# for (a, b) in zip(range(3), range(3, 6)): +# pass +# """ +# self.unchanged(code) +# +# def test_xrange(self): +# code = ''' +# for i in xrange(10): +# pass +# ''' +# self.convert(code) +# +# @unittest.expectedFailure +# def test_source_coding_utf8(self): +# """ +# Tests to ensure that the source coding line is not corrupted or +# removed. It must be left as the first line in the file (including +# before any __future__ imports). Also tests whether the unicode +# characters in this encoding are parsed correctly and left alone. +# """ +# code = """ +# # -*- coding: utf-8 -*- +# icons = [u"◐", u"◓", u"◑", u"◒"] +# """ +# self.unchanged(code) +# +# def test_exception_syntax(self): +# """ +# Test of whether futurize handles the old-style exception syntax +# """ +# before = """ +# try: +# pass +# except IOError, e: +# val = e.errno +# """ +# after = """ +# try: +# pass +# except IOError as e: +# val = e.errno +# """ +# self.convert_check(before, after) +# +# def test_super(self): +# """ +# This tests whether futurize keeps the old two-argument super() calls the +# same as before. It should, because this still works in Py3. +# """ +# code = ''' +# class VerboseList(list): +# def append(self, item): +# print('Adding an item') +# super(VerboseList, self).append(item) +# ''' +# self.unchanged(code) +# +# @unittest.expectedFailure +# def test_file(self): +# """ +# file() as a synonym for open() is obsolete and invalid on Python 3. +# """ +# before = ''' +# f = file(__file__) +# data = f.read() +# f.close() +# ''' +# after = ''' +# f = open(__file__) +# data = f.read() +# f.close() +# ''' +# self.convert_check(before, after) +# +# def test_apply(self): +# before = ''' +# def addup(*x): +# return sum(x) +# +# assert apply(addup, (10,20)) == 30 +# ''' +# after = """ +# def addup(*x): +# return sum(x) +# +# assert addup(*(10,20)) == 30 +# """ +# self.convert_check(before, after) +# +# @unittest.skip('not implemented yet') +# def test_download_pypi_package_and_test(self, package_name='future'): +# URL = 'http://pypi.python.org/pypi/{0}/json' +# +# import requests +# r = requests.get(URL.format(package_name)) +# pprint.pprint(r.json()) +# +# download_url = r.json()['urls'][0]['url'] +# filename = r.json()['urls'][0]['filename'] +# # r2 = requests.get(download_url) +# # with open('/tmp/' + filename, 'w') as tarball: +# # tarball.write(r2.content) +# +# def test_raw_input(self): +# """ +# Passes in a string to the waiting input() after futurize +# conversion. +# +# The code is the first snippet from these docs: +# http://docs.python.org/2/library/2to3.html +# """ +# before = """ +# def greet(name): +# print "Hello, {0}!".format(name) +# print "What's your name?" +# name = raw_input() +# greet(name) +# """ +# desired = """ +# def greet(name): +# print("Hello, {0}!".format(name)) +# print("What's your name?") +# name = input() +# greet(name) +# """ +# self.convert_check(before, desired, run=False) +# +# for interpreter in self.interpreters: +# p1 = Popen([interpreter, self.tempdir + 'mytestscript.py'], +# stdout=PIPE, stdin=PIPE, stderr=PIPE) +# (stdout, stderr) = p1.communicate(b'Ed') +# self.assertEqual(stdout, b"What's your name?\nHello, Ed!\n") +# +# def test_literal_prefixes_are_not_stripped(self): +# """ +# Tests to ensure that the u'' and b'' prefixes on unicode strings and +# byte strings are not removed by the futurize script. Removing the +# prefixes on Py3.3+ is unnecessary and loses some information -- namely, +# that the strings have explicitly been marked as unicode or bytes, +# rather than just e.g. a guess by some automated tool about what they +# are. +# """ +# code = ''' +# s = u'unicode string' +# b = b'byte string' +# ''' +# self.unchanged(code) +# +# @unittest.expectedFailure +# def test_division(self): +# """ +# TODO: implement this! +# """ +# before = """ +# x = 1 / 2 +# """ +# after = """ +# from future.utils import old_div +# x = old_div(1, 2) +# """ +# self.convert_check(before, after, stages=[1]) +# +# +# class TestFuturizeRenamedStdlib(CodeHandler): +# def test_renamed_modules(self): +# before = """ +# import ConfigParser +# import copy_reg +# import cPickle +# import cStringIO +# +# s = cStringIO.StringIO('blah') +# """ +# after = """ +# import configparser +# import copyreg +# import pickle +# import io +# +# s = io.StringIO('blah') +# """ +# self.convert_check(before, after) +# +# @unittest.expectedFailure +# def test_urllib_refactor(self): +# # Code like this using urllib is refactored by futurize --stage2 to use +# # the new Py3 module names, but ``future`` doesn't support urllib yet. +# before = """ +# import urllib +# +# URL = 'http://pypi.python.org/pypi/future/json' +# package_name = 'future' +# r = urllib.urlopen(URL.format(package_name)) +# data = r.read() +# """ +# after = """ +# import urllib.request +# +# URL = 'http://pypi.python.org/pypi/future/json' +# package_name = 'future' +# r = urllib.request.urlopen(URL.format(package_name)) +# data = r.read() +# """ +# self.convert_check(before, after) +# +# def test_renamed_copy_reg_and_cPickle_modules(self): +# """ +# Example from docs.python.org/2/library/copy_reg.html +# """ +# before = """ +# import copy_reg +# import copy +# import cPickle +# class C(object): +# def __init__(self, a): +# self.a = a +# +# def pickle_c(c): +# print('pickling a C instance...') +# return C, (c.a,) +# +# copy_reg.pickle(C, pickle_c) +# c = C(1) +# d = copy.copy(c) +# p = cPickle.dumps(c) +# """ +# after = """ +# import copyreg +# import copy +# import pickle +# class C(object): +# def __init__(self, a): +# self.a = a +# +# def pickle_c(c): +# print('pickling a C instance...') +# return C, (c.a,) +# +# copyreg.pickle(C, pickle_c) +# c = C(1) +# d = copy.copy(c) +# p = pickle.dumps(c) +# """ +# self.convert_check(before, after) +# +# @unittest.expectedFailure +# def test_Py2_StringIO_module(self): +# """ +# Ideally, there would be a fixer for this. For now: +# +# TODO: add the Py3 equivalent for this to the docs +# """ +# before = """ +# import cStringIO +# s = cStringIO.StringIO('my string') +# assert isinstance(s, cStringIO.InputType) +# """ +# after = """ +# import io +# s = io.StringIO('my string') +# # assert isinstance(s, io.InputType) +# # There is no io.InputType in Python 3. What should we change this to +# # instead? +# """ +# self.convert_check(before, after) +# +# +# class TestFuturizeStage1(CodeHandler): +# # """ +# # Tests "stage 1": safe optimizations: modernizing Python 2 code so that it +# # uses print functions, new-style exception syntax, etc. +# +# # The behaviour should not change and this should introduce no dependency on +# # the ``future`` package. It produces more modern Python 2-only code. The +# # goal is to reduce the size of the real porting patch-set by performing +# # the uncontroversial patches first. +# # """ +# +# def test_apply(self): +# """ +# apply() should be changed by futurize --stage1 +# """ +# before = ''' +# def f(a, b): +# return a + b +# +# args = (1, 2) +# assert apply(f, args) == 3 +# assert apply(f, ('a', 'b')) == 'ab' +# ''' +# after = ''' +# def f(a, b): +# return a + b +# +# args = (1, 2) +# assert f(*args) == 3 +# assert f(*('a', 'b')) == 'ab' +# ''' +# self.convert_check(before, after, stages=[1]) +# +# def test_xrange(self): +# """ +# xrange should not be changed by futurize --stage1 +# """ +# code = ''' +# for i in xrange(10): +# pass +# ''' +# self.unchanged(code, stages=[1]) +# +# @unittest.expectedFailure +# def test_absolute_import_changes(self): +# """ +# Implicit relative imports should be converted to absolute or explicit +# relative imports correctly. +# +# Issue #16 (with porting bokeh/bbmodel.py) +# """ +# with open('specialmodels.py', 'w') as f: +# f.write('pass') +# +# before = """ +# import specialmodels.pandasmodel +# specialmodels.pandasmodel.blah() +# """ +# after = """ +# from __future__ import absolute_import +# from .specialmodels import pandasmodel +# pandasmodel.blah() +# """ +# self.convert_check(before, after, stages=[1]) +# +# def test_safe_futurize_imports(self): +# """ +# The standard library module names should not be changed until stage 2 +# """ +# before = """ +# import ConfigParser +# import HTMLParser +# import collections +# +# ConfigParser.ConfigParser +# HTMLParser.HTMLParser +# d = collections.OrderedDict() +# """ +# self.unchanged(before, stages=[1]) +# +# def test_print(self): +# before = """ +# print 'Hello' +# """ +# after = """ +# print('Hello') +# """ +# self.convert_check(before, after, stages=[1]) +# +# before = """ +# import sys +# print >> sys.stderr, 'Hello', 'world' +# """ +# after = """ +# import sys +# print('Hello', 'world', file=sys.stderr) +# """ +# self.convert_check(before, after, stages=[1]) +# +# def test_print_already_function(self): +# """ +# Running futurize --stage1 should not add a second set of parentheses +# """ +# before = """ +# print('Hello') +# """ +# self.unchanged(before, stages=[1]) +# +# @unittest.expectedFailure +# def test_print_already_function_complex(self): +# """ +# Running futurize --stage1 does add a second second set of parentheses +# in this case. This is because the underlying lib2to3 has two distinct +# grammars -- with a print statement and with a print function -- and, +# when going forwards (2 to both), futurize assumes print is a statement, +# which raises a ParseError. +# """ +# before = """ +# import sys +# print('Hello', 'world', file=sys.stderr) +# """ +# self.unchanged(before, stages=[1]) +# +# def test_exceptions(self): +# before = """ +# try: +# raise AttributeError('blah') +# except AttributeError, e: +# pass +# """ +# after = """ +# try: +# raise AttributeError('blah') +# except AttributeError as e: +# pass +# """ +# self.convert_check(before, after, stages=[1]) +# +# @unittest.expectedFailure +# def test_string_exceptions(self): +# """ +# 2to3 does not convert string exceptions: see +# http://python3porting.com/differences.html. +# """ +# before = """ +# try: +# raise "old string exception" +# except Exception, e: +# pass +# """ +# after = """ +# try: +# raise Exception("old string exception") +# except Exception as e: +# pass +# """ +# self.convert_check(before, after, stages=[1]) +# +# @unittest.expectedFailure +# def test_oldstyle_classes(self): +# """ +# We don't convert old-style classes to new-style automatically. Should we? +# """ +# before = """ +# class Blah: +# pass +# """ +# after = """ +# class Blah(object): +# pass +# """ +# self.convert_check(before, after, stages=[1]) +# +# +# def test_octal_literals(self): +# before = """ +# mode = 0644 +# """ +# after = """ +# mode = 0o644 +# """ +# self.convert_check(before, after) +# +# def test_long_int_literals(self): +# before = """ +# bignumber = 12345678901234567890L +# """ +# after = """ +# bignumber = 12345678901234567890 +# """ +# self.convert_check(before, after) +# +# def test___future___import_position(self): +# """ +# Issue #4: __future__ imports inserted too low in file: SyntaxError +# """ +# code = """ +# # Comments here +# # and here +# __version__=''' $Id$ ''' +# __doc__="A Sequencer class counts things. It aids numbering and formatting lists." +# __all__='Sequencer getSequencer setSequencer'.split() +# # +# # another comment +# # +# +# CONSTANTS = [ 0, 01, 011, 0111, 012, 02, 021, 0211, 02111, 013 ] +# _RN_LETTERS = "IVXLCDM" +# +# def my_func(value): +# pass +# +# ''' Docstring-like comment here ''' +# """ +# self.convert(code) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_past/test_values.py b/tests/test_past/test_values.py new file mode 100644 index 00000000..11872084 --- /dev/null +++ b/tests/test_past/test_values.py @@ -0,0 +1,225 @@ +from math import pi + +inf, nan = float('inf'), float('nan') +test_values = [ + 0, 1, 2, -1, -9999999999, 9999999, + 0.0, inf, pi, + [], [[]], [1, 2, 3], + set(), set([1, 2, 3]), + " ", "", "1", "dsada saA.", "2", "dsa", b"", b"dsa", b" ", + {5: 3}, dict(), dict(a=99), dict(a=1, b=2, c=3), None +] + +# cmp_python2_values are pre-calculated from running cmp under python2 first values are x and y, last is results of cmp +cmp_python2_value = [[0, 1, -1], [0, 2, -1], [0, -1, 1], [0, -9999999999999999, 1], [0, 9999999999999999, -1], + [0, 0.0, 0], [0, inf, -1], [0, 3.141592653589793, -1], [0, [], -1], [0, [[]], -1], + [0, [1, 2, 3], -1], [0, '', -1], [0, ' ', -1], [0, '1', -1], [0, 'a bee cd.', -1], [0, '', -1], + [0, ' ', -1], [0, '1', -1], [0, 'a bee cd.', -1], [0, set([]), -1], [0, set([1, 2, 3]), -1], + [0, {5: 3}, -1], [0, {}, -1], [0, {'a': 99}, -1], [0, {'a': 1, 'c': 3, 'b': 2}, -1], + [0, {'a': 99, 'c': 3, 'b': 5}, -1], [0, None, 1], [1, 0, 1], [1, 2, -1], [1, -1, 1], + [1, -9999999999999999, 1], [1, 9999999999999999, -1], [1, 0.0, 1], [1, inf, -1], + [1, 3.141592653589793, -1], [1, [], -1], [1, [[]], -1], [1, [1, 2, 3], -1], [1, '', -1], + [1, ' ', -1], [1, '1', -1], [1, 'a bee cd.', -1], [1, '', -1], [1, ' ', -1], [1, '1', -1], + [1, 'a bee cd.', -1], [1, set([]), -1], [1, set([1, 2, 3]), -1], [1, {5: 3}, -1], [1, {}, -1], + [1, {'a': 99}, -1], [1, {'a': 1, 'c': 3, 'b': 2}, -1], [1, {'a': 99, 'c': 3, 'b': 5}, -1], + [1, None, 1], [2, 0, 1], [2, 1, 1], [2, -1, 1], [2, -9999999999999999, 1], + [2, 9999999999999999, -1], [2, 0.0, 1], [2, inf, -1], [2, 3.141592653589793, -1], [2, [], -1], + [2, [[]], -1], [2, [1, 2, 3], -1], [2, '', -1], [2, ' ', -1], [2, '1', -1], [2, 'a bee cd.', -1], + [2, '', -1], [2, ' ', -1], [2, '1', -1], [2, 'a bee cd.', -1], [2, set([]), -1], + [2, set([1, 2, 3]), -1], [2, {5: 3}, -1], [2, {}, -1], [2, {'a': 99}, -1], + [2, {'a': 1, 'c': 3, 'b': 2}, -1], [2, {'a': 99, 'c': 3, 'b': 5}, -1], [2, None, 1], [-1, 0, -1], + [-1, 1, -1], [-1, 2, -1], [-1, -9999999999999999, 1], [-1, 9999999999999999, -1], [-1, 0.0, -1], + [-1, inf, -1], [-1, 3.141592653589793, -1], [-1, [], -1], [-1, [[]], -1], [-1, [1, 2, 3], -1], + [-1, '', -1], [-1, ' ', -1], [-1, '1', -1], [-1, 'a bee cd.', -1], [-1, '', -1], [-1, ' ', -1], + [-1, '1', -1], [-1, 'a bee cd.', -1], [-1, set([]), -1], [-1, set([1, 2, 3]), -1], + [-1, {5: 3}, -1], [-1, {}, -1], [-1, {'a': 99}, -1], [-1, {'a': 1, 'c': 3, 'b': 2}, -1], + [-1, {'a': 99, 'c': 3, 'b': 5}, -1], [-1, None, 1], [-9999999999999999, 0, -1], + [-9999999999999999, 1, -1], [-9999999999999999, 2, -1], [-9999999999999999, -1, -1], + [-9999999999999999, 9999999999999999, -1], [-9999999999999999, 0.0, -1], + [-9999999999999999, inf, -1], [-9999999999999999, 3.141592653589793, -1], + [-9999999999999999, [], -1], [-9999999999999999, [[]], -1], [-9999999999999999, [1, 2, 3], -1], + [-9999999999999999, '', -1], [-9999999999999999, ' ', -1], [-9999999999999999, '1', -1], + [-9999999999999999, 'a bee cd.', -1], [-9999999999999999, '', -1], [-9999999999999999, ' ', -1], + [-9999999999999999, '1', -1], [-9999999999999999, 'a bee cd.', -1], + [-9999999999999999, set([]), -1], [-9999999999999999, set([1, 2, 3]), -1], + [-9999999999999999, {5: 3}, -1], [-9999999999999999, {}, -1], [-9999999999999999, {'a': 99}, -1], + [-9999999999999999, {'a': 1, 'c': 3, 'b': 2}, -1], + [-9999999999999999, {'a': 99, 'c': 3, 'b': 5}, -1], [-9999999999999999, None, 1], + [9999999999999999, 0, 1], [9999999999999999, 1, 1], [9999999999999999, 2, 1], + [9999999999999999, -1, 1], [9999999999999999, -9999999999999999, 1], [9999999999999999, 0.0, 1], + [9999999999999999, inf, -1], [9999999999999999, 3.141592653589793, 1], [9999999999999999, [], -1], + [9999999999999999, [[]], -1], [9999999999999999, [1, 2, 3], -1], [9999999999999999, '', -1], + [9999999999999999, ' ', -1], [9999999999999999, '1', -1], [9999999999999999, 'a bee cd.', -1], + [9999999999999999, '', -1], [9999999999999999, ' ', -1], [9999999999999999, '1', -1], + [9999999999999999, 'a bee cd.', -1], [9999999999999999, set([]), -1], + [9999999999999999, set([1, 2, 3]), -1], [9999999999999999, {5: 3}, -1], [9999999999999999, {}, -1], + [9999999999999999, {'a': 99}, -1], [9999999999999999, {'a': 1, 'c': 3, 'b': 2}, -1], + [9999999999999999, {'a': 99, 'c': 3, 'b': 5}, -1], [9999999999999999, None, 1], [0.0, 0, 0], + [0.0, 1, -1], [0.0, 2, -1], [0.0, -1, 1], [0.0, -9999999999999999, 1], [0.0, 9999999999999999, -1], + [0.0, inf, -1], [0.0, 3.141592653589793, -1], [0.0, [], -1], [0.0, [[]], -1], [0.0, [1, 2, 3], -1], + [0.0, '', -1], [0.0, ' ', -1], [0.0, '1', -1], [0.0, 'a bee cd.', -1], [0.0, '', -1], + [0.0, ' ', -1], [0.0, '1', -1], [0.0, 'a bee cd.', -1], [0.0, set([]), -1], + [0.0, set([1, 2, 3]), -1], [0.0, {5: 3}, -1], [0.0, {}, -1], [0.0, {'a': 99}, -1], + [0.0, {'a': 1, 'c': 3, 'b': 2}, -1], [0.0, {'a': 99, 'c': 3, 'b': 5}, -1], [0.0, None, 1], + [inf, 0, 1], [inf, 1, 1], [inf, 2, 1], [inf, -1, 1], [inf, -9999999999999999, 1], + [inf, 9999999999999999, 1], [inf, 0.0, 1], [inf, 3.141592653589793, 1], [inf, [], -1], + [inf, [[]], -1], [inf, [1, 2, 3], -1], [inf, '', -1], [inf, ' ', -1], [inf, '1', -1], + [inf, 'a bee cd.', -1], [inf, '', -1], [inf, ' ', -1], [inf, '1', -1], [inf, 'a bee cd.', -1], + [inf, set([]), -1], [inf, set([1, 2, 3]), -1], [inf, {5: 3}, -1], [inf, {}, -1], + [inf, {'a': 99}, -1], [inf, {'a': 1, 'c': 3, 'b': 2}, -1], [inf, {'a': 99, 'c': 3, 'b': 5}, -1], + [inf, None, 1], [3.141592653589793, 0, 1], [3.141592653589793, 1, 1], [3.141592653589793, 2, 1], + [3.141592653589793, -1, 1], [3.141592653589793, -9999999999999999, 1], + [3.141592653589793, 9999999999999999, -1], [3.141592653589793, 0.0, 1], + [3.141592653589793, inf, -1], [3.141592653589793, [], -1], [3.141592653589793, [[]], -1], + [3.141592653589793, [1, 2, 3], -1], [3.141592653589793, '', -1], [3.141592653589793, ' ', -1], + [3.141592653589793, '1', -1], [3.141592653589793, 'a bee cd.', -1], [3.141592653589793, '', -1], + [3.141592653589793, ' ', -1], [3.141592653589793, '1', -1], [3.141592653589793, 'a bee cd.', -1], + [3.141592653589793, set([]), -1], [3.141592653589793, set([1, 2, 3]), -1], + [3.141592653589793, {5: 3}, -1], [3.141592653589793, {}, -1], [3.141592653589793, {'a': 99}, -1], + [3.141592653589793, {'a': 1, 'c': 3, 'b': 2}, -1], + [3.141592653589793, {'a': 99, 'c': 3, 'b': 5}, -1], [3.141592653589793, None, 1], [[], 0, 1], + [[], 1, 1], [[], 2, 1], [[], -1, 1], [[], -9999999999999999, 1], [[], 9999999999999999, 1], + [[], 0.0, 1], [[], inf, 1], [[], 3.141592653589793, 1], [[], [[]], -1], [[], [1, 2, 3], -1], + [[], '', -1], [[], ' ', -1], [[], '1', -1], [[], 'a bee cd.', -1], [[], '', -1], [[], ' ', -1], + [[], '1', -1], [[], 'a bee cd.', -1], [[], set([]), -1], [[], set([1, 2, 3]), -1], [[], {5: 3}, 1], + [[], {}, 1], [[], {'a': 99}, 1], [[], {'a': 1, 'c': 3, 'b': 2}, 1], + [[], {'a': 99, 'c': 3, 'b': 5}, 1], [[], None, 1], [[[]], 0, 1], [[[]], 1, 1], [[[]], 2, 1], + [[[]], -1, 1], [[[]], -9999999999999999, 1], [[[]], 9999999999999999, 1], [[[]], 0.0, 1], + [[[]], inf, 1], [[[]], 3.141592653589793, 1], [[[]], [], 1], [[[]], [1, 2, 3], 1], [[[]], '', -1], + [[[]], ' ', -1], [[[]], '1', -1], [[[]], 'a bee cd.', -1], [[[]], '', -1], [[[]], ' ', -1], + [[[]], '1', -1], [[[]], 'a bee cd.', -1], [[[]], set([]), -1], [[[]], set([1, 2, 3]), -1], + [[[]], {5: 3}, 1], [[[]], {}, 1], [[[]], {'a': 99}, 1], [[[]], {'a': 1, 'c': 3, 'b': 2}, 1], + [[[]], {'a': 99, 'c': 3, 'b': 5}, 1], [[[]], None, 1], [[1, 2, 3], 0, 1], [[1, 2, 3], 1, 1], + [[1, 2, 3], 2, 1], [[1, 2, 3], -1, 1], [[1, 2, 3], -9999999999999999, 1], + [[1, 2, 3], 9999999999999999, 1], [[1, 2, 3], 0.0, 1], [[1, 2, 3], inf, 1], + [[1, 2, 3], 3.141592653589793, 1], [[1, 2, 3], [], 1], [[1, 2, 3], [[]], -1], [[1, 2, 3], '', -1], + [[1, 2, 3], ' ', -1], [[1, 2, 3], '1', -1], [[1, 2, 3], 'a bee cd.', -1], [[1, 2, 3], '', -1], + [[1, 2, 3], ' ', -1], [[1, 2, 3], '1', -1], [[1, 2, 3], 'a bee cd.', -1], [[1, 2, 3], set([]), -1], + [[1, 2, 3], set([1, 2, 3]), -1], [[1, 2, 3], {5: 3}, 1], [[1, 2, 3], {}, 1], + [[1, 2, 3], {'a': 99}, 1], [[1, 2, 3], {'a': 1, 'c': 3, 'b': 2}, 1], + [[1, 2, 3], {'a': 99, 'c': 3, 'b': 5}, 1], [[1, 2, 3], None, 1], ['', 0, 1], ['', 1, 1], + ['', 2, 1], ['', -1, 1], ['', -9999999999999999, 1], ['', 9999999999999999, 1], ['', 0.0, 1], + ['', inf, 1], ['', 3.141592653589793, 1], ['', [], 1], ['', [[]], 1], ['', [1, 2, 3], 1], + ['', ' ', -1], ['', '1', -1], ['', 'a bee cd.', -1], ['', '', 0], ['', ' ', -1], ['', '1', -1], + ['', 'a bee cd.', -1], ['', set([]), 1], ['', set([1, 2, 3]), 1], ['', {5: 3}, 1], ['', {}, 1], + ['', {'a': 99}, 1], ['', {'a': 1, 'c': 3, 'b': 2}, 1], ['', {'a': 99, 'c': 3, 'b': 5}, 1], + ['', None, 1], [' ', 0, 1], [' ', 1, 1], [' ', 2, 1], [' ', -1, 1], [' ', -9999999999999999, 1], + [' ', 9999999999999999, 1], [' ', 0.0, 1], [' ', inf, 1], [' ', 3.141592653589793, 1], + [' ', [], 1], [' ', [[]], 1], [' ', [1, 2, 3], 1], [' ', '', 1], [' ', '1', -1], + [' ', 'a bee cd.', -1], [' ', '', 1], [' ', ' ', 0], [' ', '1', -1], [' ', 'a bee cd.', -1], + [' ', set([]), 1], [' ', set([1, 2, 3]), 1], [' ', {5: 3}, 1], [' ', {}, 1], [' ', {'a': 99}, 1], + [' ', {'a': 1, 'c': 3, 'b': 2}, 1], [' ', {'a': 99, 'c': 3, 'b': 5}, 1], [' ', None, 1], + ['1', 0, 1], ['1', 1, 1], ['1', 2, 1], ['1', -1, 1], ['1', -9999999999999999, 1], + ['1', 9999999999999999, 1], ['1', 0.0, 1], ['1', inf, 1], ['1', 3.141592653589793, 1], + ['1', [], 1], ['1', [[]], 1], ['1', [1, 2, 3], 1], ['1', '', 1], ['1', ' ', 1], + ['1', 'a bee cd.', -1], ['1', '', 1], ['1', ' ', 1], ['1', '1', 0], ['1', 'a bee cd.', -1], + ['1', set([]), 1], ['1', set([1, 2, 3]), 1], ['1', {5: 3}, 1], ['1', {}, 1], ['1', {'a': 99}, 1], + ['1', {'a': 1, 'c': 3, 'b': 2}, 1], ['1', {'a': 99, 'c': 3, 'b': 5}, 1], ['1', None, 1], + ['a bee cd.', 0, 1], ['a bee cd.', 1, 1], ['a bee cd.', 2, 1], ['a bee cd.', -1, 1], + ['a bee cd.', -9999999999999999, 1], ['a bee cd.', 9999999999999999, 1], ['a bee cd.', 0.0, 1], + ['a bee cd.', inf, 1], ['a bee cd.', 3.141592653589793, 1], ['a bee cd.', [], 1], + ['a bee cd.', [[]], 1], ['a bee cd.', [1, 2, 3], 1], ['a bee cd.', '', 1], ['a bee cd.', ' ', 1], + ['a bee cd.', '1', 1], ['a bee cd.', '', 1], ['a bee cd.', ' ', 1], ['a bee cd.', '1', 1], + ['a bee cd.', 'a bee cd.', 0], ['a bee cd.', set([]), 1], ['a bee cd.', set([1, 2, 3]), 1], + ['a bee cd.', {5: 3}, 1], ['a bee cd.', {}, 1], ['a bee cd.', {'a': 99}, 1], + ['a bee cd.', {'a': 1, 'c': 3, 'b': 2}, 1], ['a bee cd.', {'a': 99, 'c': 3, 'b': 5}, 1], + ['a bee cd.', None, 1], ['', 0, 1], ['', 1, 1], ['', 2, 1], ['', -1, 1], + ['', -9999999999999999, 1], ['', 9999999999999999, 1], ['', 0.0, 1], ['', inf, 1], + ['', 3.141592653589793, 1], ['', [], 1], ['', [[]], 1], ['', [1, 2, 3], 1], ['', '', 0], + ['', ' ', -1], ['', '1', -1], ['', 'a bee cd.', -1], ['', ' ', -1], ['', '1', -1], + ['', 'a bee cd.', -1], ['', set([]), 1], ['', set([1, 2, 3]), 1], ['', {5: 3}, 1], ['', {}, 1], + ['', {'a': 99}, 1], ['', {'a': 1, 'c': 3, 'b': 2}, 1], ['', {'a': 99, 'c': 3, 'b': 5}, 1], + ['', None, 1], [' ', 0, 1], [' ', 1, 1], [' ', 2, 1], [' ', -1, 1], [' ', -9999999999999999, 1], + [' ', 9999999999999999, 1], [' ', 0.0, 1], [' ', inf, 1], [' ', 3.141592653589793, 1], + [' ', [], 1], [' ', [[]], 1], [' ', [1, 2, 3], 1], [' ', '', 1], [' ', ' ', 0], [' ', '1', -1], + [' ', 'a bee cd.', -1], [' ', '', 1], [' ', '1', -1], [' ', 'a bee cd.', -1], [' ', set([]), 1], + [' ', set([1, 2, 3]), 1], [' ', {5: 3}, 1], [' ', {}, 1], [' ', {'a': 99}, 1], + [' ', {'a': 1, 'c': 3, 'b': 2}, 1], [' ', {'a': 99, 'c': 3, 'b': 5}, 1], [' ', None, 1], + ['1', 0, 1], ['1', 1, 1], ['1', 2, 1], ['1', -1, 1], ['1', -9999999999999999, 1], + ['1', 9999999999999999, 1], ['1', 0.0, 1], ['1', inf, 1], ['1', 3.141592653589793, 1], + ['1', [], 1], ['1', [[]], 1], ['1', [1, 2, 3], 1], ['1', '', 1], ['1', ' ', 1], ['1', '1', 0], + ['1', 'a bee cd.', -1], ['1', '', 1], ['1', ' ', 1], ['1', 'a bee cd.', -1], ['1', set([]), 1], + ['1', set([1, 2, 3]), 1], ['1', {5: 3}, 1], ['1', {}, 1], ['1', {'a': 99}, 1], + ['1', {'a': 1, 'c': 3, 'b': 2}, 1], ['1', {'a': 99, 'c': 3, 'b': 5}, 1], ['1', None, 1], + ['a bee cd.', 0, 1], ['a bee cd.', 1, 1], ['a bee cd.', 2, 1], ['a bee cd.', -1, 1], + ['a bee cd.', -9999999999999999, 1], ['a bee cd.', 9999999999999999, 1], ['a bee cd.', 0.0, 1], + ['a bee cd.', inf, 1], ['a bee cd.', 3.141592653589793, 1], ['a bee cd.', [], 1], + ['a bee cd.', [[]], 1], ['a bee cd.', [1, 2, 3], 1], ['a bee cd.', '', 1], ['a bee cd.', ' ', 1], + ['a bee cd.', '1', 1], ['a bee cd.', 'a bee cd.', 0], ['a bee cd.', '', 1], ['a bee cd.', ' ', 1], + ['a bee cd.', '1', 1], ['a bee cd.', set([]), 1], ['a bee cd.', set([1, 2, 3]), 1], + ['a bee cd.', {5: 3}, 1], ['a bee cd.', {}, 1], ['a bee cd.', {'a': 99}, 1], + ['a bee cd.', {'a': 1, 'c': 3, 'b': 2}, 1], ['a bee cd.', {'a': 99, 'c': 3, 'b': 5}, 1], + ['a bee cd.', None, 1], [set([]), 0, 1], [set([]), 1, 1], [set([]), 2, 1], [set([]), -1, 1], + [set([]), -9999999999999999, 1], [set([]), 9999999999999999, 1], [set([]), 0.0, 1], + [set([]), inf, 1], [set([]), 3.141592653589793, 1], [set([]), [], 1], [set([]), [[]], 1], + [set([]), [1, 2, 3], 1], [set([]), '', -1], [set([]), ' ', -1], [set([]), '1', -1], + [set([]), 'a bee cd.', -1], [set([]), '', -1], [set([]), ' ', -1], [set([]), '1', -1], + [set([]), 'a bee cd.', -1], + [set([]), set([1, 2, 3]), 'TypeError: cannot compare sets using cmp()'], [set([]), {5: 3}, 1], + [set([]), {}, 1], [set([]), {'a': 99}, 1], [set([]), {'a': 1, 'c': 3, 'b': 2}, 1], + [set([]), {'a': 99, 'c': 3, 'b': 5}, 1], [set([]), None, 1], [set([1, 2, 3]), 0, 1], + [set([1, 2, 3]), 1, 1], [set([1, 2, 3]), 2, 1], [set([1, 2, 3]), -1, 1], + [set([1, 2, 3]), -9999999999999999, 1], [set([1, 2, 3]), 9999999999999999, 1], + [set([1, 2, 3]), 0.0, 1], [set([1, 2, 3]), inf, 1], [set([1, 2, 3]), 3.141592653589793, 1], + [set([1, 2, 3]), [], 1], [set([1, 2, 3]), [[]], 1], [set([1, 2, 3]), [1, 2, 3], 1], + [set([1, 2, 3]), '', -1], [set([1, 2, 3]), ' ', -1], [set([1, 2, 3]), '1', -1], + [set([1, 2, 3]), 'a bee cd.', -1], [set([1, 2, 3]), '', -1], [set([1, 2, 3]), ' ', -1], + [set([1, 2, 3]), '1', -1], [set([1, 2, 3]), 'a bee cd.', -1], + [set([1, 2, 3]), set([]), 'TypeError: cannot compare sets using cmp()'], + [set([1, 2, 3]), {5: 3}, 1], [set([1, 2, 3]), {}, 1], [set([1, 2, 3]), {'a': 99}, 1], + [set([1, 2, 3]), {'a': 1, 'c': 3, 'b': 2}, 1], [set([1, 2, 3]), {'a': 99, 'c': 3, 'b': 5}, 1], + [set([1, 2, 3]), None, 1], [{5: 3}, 0, 1], [{5: 3}, 1, 1], [{5: 3}, 2, 1], [{5: 3}, -1, 1], + [{5: 3}, -9999999999999999, 1], [{5: 3}, 9999999999999999, 1], [{5: 3}, 0.0, 1], [{5: 3}, inf, 1], + [{5: 3}, 3.141592653589793, 1], [{5: 3}, [], -1], [{5: 3}, [[]], -1], [{5: 3}, [1, 2, 3], -1], + [{5: 3}, '', -1], [{5: 3}, ' ', -1], [{5: 3}, '1', -1], [{5: 3}, 'a bee cd.', -1], + [{5: 3}, '', -1], [{5: 3}, ' ', -1], [{5: 3}, '1', -1], [{5: 3}, 'a bee cd.', -1], + [{5: 3}, set([]), -1], [{5: 3}, set([1, 2, 3]), -1], [{5: 3}, {}, 1], [{5: 3}, {'a': 99}, -1], + [{5: 3}, {'a': 1, 'c': 3, 'b': 2}, -1], [{5: 3}, {'a': 99, 'c': 3, 'b': 5}, -1], [{5: 3}, None, 1], + [{}, 0, 1], [{}, 1, 1], [{}, 2, 1], [{}, -1, 1], [{}, -9999999999999999, 1], + [{}, 9999999999999999, 1], [{}, 0.0, 1], [{}, inf, 1], [{}, 3.141592653589793, 1], [{}, [], -1], + [{}, [[]], -1], [{}, [1, 2, 3], -1], [{}, '', -1], [{}, ' ', -1], [{}, '1', -1], + [{}, 'a bee cd.', -1], [{}, '', -1], [{}, ' ', -1], [{}, '1', -1], [{}, 'a bee cd.', -1], + [{}, set([]), -1], [{}, set([1, 2, 3]), -1], [{}, {5: 3}, -1], [{}, {'a': 99}, -1], + [{}, {'a': 1, 'c': 3, 'b': 2}, -1], [{}, {'a': 99, 'c': 3, 'b': 5}, -1], [{}, None, 1], + [{'a': 99}, 0, 1], [{'a': 99}, 1, 1], [{'a': 99}, 2, 1], [{'a': 99}, -1, 1], + [{'a': 99}, -9999999999999999, 1], [{'a': 99}, 9999999999999999, 1], [{'a': 99}, 0.0, 1], + [{'a': 99}, inf, 1], [{'a': 99}, 3.141592653589793, 1], [{'a': 99}, [], -1], [{'a': 99}, [[]], -1], + [{'a': 99}, [1, 2, 3], -1], [{'a': 99}, '', -1], [{'a': 99}, ' ', -1], [{'a': 99}, '1', -1], + [{'a': 99}, 'a bee cd.', -1], [{'a': 99}, '', -1], [{'a': 99}, ' ', -1], [{'a': 99}, '1', -1], + [{'a': 99}, 'a bee cd.', -1], [{'a': 99}, set([]), -1], [{'a': 99}, set([1, 2, 3]), -1], + [{'a': 99}, {5: 3}, 1], [{'a': 99}, {}, 1], [{'a': 99}, {'a': 1, 'c': 3, 'b': 2}, -1], + [{'a': 99}, {'a': 99, 'c': 3, 'b': 5}, -1], [{'a': 99}, None, 1], [{'a': 1, 'c': 3, 'b': 2}, 0, 1], + [{'a': 1, 'c': 3, 'b': 2}, 1, 1], [{'a': 1, 'c': 3, 'b': 2}, 2, 1], + [{'a': 1, 'c': 3, 'b': 2}, -1, 1], [{'a': 1, 'c': 3, 'b': 2}, -9999999999999999, 1], + [{'a': 1, 'c': 3, 'b': 2}, 9999999999999999, 1], [{'a': 1, 'c': 3, 'b': 2}, 0.0, 1], + [{'a': 1, 'c': 3, 'b': 2}, inf, 1], [{'a': 1, 'c': 3, 'b': 2}, 3.141592653589793, 1], + [{'a': 1, 'c': 3, 'b': 2}, [], -1], [{'a': 1, 'c': 3, 'b': 2}, [[]], -1], + [{'a': 1, 'c': 3, 'b': 2}, [1, 2, 3], -1], [{'a': 1, 'c': 3, 'b': 2}, '', -1], + [{'a': 1, 'c': 3, 'b': 2}, ' ', -1], [{'a': 1, 'c': 3, 'b': 2}, '1', -1], + [{'a': 1, 'c': 3, 'b': 2}, 'a bee cd.', -1], [{'a': 1, 'c': 3, 'b': 2}, '', -1], + [{'a': 1, 'c': 3, 'b': 2}, ' ', -1], [{'a': 1, 'c': 3, 'b': 2}, '1', -1], + [{'a': 1, 'c': 3, 'b': 2}, 'a bee cd.', -1], [{'a': 1, 'c': 3, 'b': 2}, set([]), -1], + [{'a': 1, 'c': 3, 'b': 2}, set([1, 2, 3]), -1], [{'a': 1, 'c': 3, 'b': 2}, {5: 3}, 1], + [{'a': 1, 'c': 3, 'b': 2}, {}, 1], [{'a': 1, 'c': 3, 'b': 2}, {'a': 99}, 1], + [{'a': 1, 'c': 3, 'b': 2}, {'a': 99, 'c': 3, 'b': 5}, -1], [{'a': 1, 'c': 3, 'b': 2}, None, 1], + [{'a': 99, 'c': 3, 'b': 5}, 0, 1], [{'a': 99, 'c': 3, 'b': 5}, 1, 1], + [{'a': 99, 'c': 3, 'b': 5}, 2, 1], [{'a': 99, 'c': 3, 'b': 5}, -1, 1], + [{'a': 99, 'c': 3, 'b': 5}, -9999999999999999, 1], + [{'a': 99, 'c': 3, 'b': 5}, 9999999999999999, 1], [{'a': 99, 'c': 3, 'b': 5}, 0.0, 1], + [{'a': 99, 'c': 3, 'b': 5}, inf, 1], [{'a': 99, 'c': 3, 'b': 5}, 3.141592653589793, 1], + [{'a': 99, 'c': 3, 'b': 5}, [], -1], [{'a': 99, 'c': 3, 'b': 5}, [[]], -1], + [{'a': 99, 'c': 3, 'b': 5}, [1, 2, 3], -1], [{'a': 99, 'c': 3, 'b': 5}, '', -1], + [{'a': 99, 'c': 3, 'b': 5}, ' ', -1], [{'a': 99, 'c': 3, 'b': 5}, '1', -1], + [{'a': 99, 'c': 3, 'b': 5}, 'a bee cd.', -1], [{'a': 99, 'c': 3, 'b': 5}, '', -1], + [{'a': 99, 'c': 3, 'b': 5}, ' ', -1], [{'a': 99, 'c': 3, 'b': 5}, '1', -1], + [{'a': 99, 'c': 3, 'b': 5}, 'a bee cd.', -1], [{'a': 99, 'c': 3, 'b': 5}, set([]), -1], + [{'a': 99, 'c': 3, 'b': 5}, set([1, 2, 3]), -1], [{'a': 99, 'c': 3, 'b': 5}, {5: 3}, 1], + [{'a': 99, 'c': 3, 'b': 5}, {}, 1], [{'a': 99, 'c': 3, 'b': 5}, {'a': 99}, 1], + [{'a': 99, 'c': 3, 'b': 5}, {'a': 1, 'c': 3, 'b': 2}, 1], [{'a': 99, 'c': 3, 'b': 5}, None, 1], + [None, 0, -1], [None, 1, -1], [None, 2, -1], [None, -1, -1], [None, -9999999999999999, -1], + [None, 9999999999999999, -1], [None, 0.0, -1], [None, inf, -1], [None, 3.141592653589793, -1], + [None, [], -1], [None, [[]], -1], [None, [1, 2, 3], -1], [None, '', -1], [None, ' ', -1], + [None, '1', -1], [None, 'a bee cd.', -1], [None, '', -1], [None, ' ', -1], [None, '1', -1], + [None, 'a bee cd.', -1], [None, set([]), -1], [None, set([1, 2, 3]), -1], [None, {5: 3}, -1], + [None, {}, -1], [None, {'a': 99}, -1], [None, {'a': 1, 'c': 3, 'b': 2}, -1], + [None, {'a': 99, 'c': 3, 'b': 5}, -1]]