element.
+ # For black navbar, do "navbar navbar-inverse"
+ 'navbar_class': "navbar navbar-inverse",
+
+ # Fix navigation bar to top of page?
+ # Values: "true" (default) or "false"
+ 'navbar_fixed_top': "true",
+
+ # Location of link to source.
+ # Options are "nav" (default), "footer" or anything else to exclude.
+ 'source_link_position': "none",
+
+ # Bootswatch (http://bootswatch.com/) theme.
+ #
+ # Options are nothing with "" (default) or the name of a valid theme
+ # such as "amelia" or "cosmo" or "united".
+ 'bootswatch_theme': "cerulean",
+
+ # Choose Bootstrap version.
+ # Values: "3" (default) or "2" (in quotes)
+ 'bootstrap_version': "3",
+}
+
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
@@ -114,12 +183,12 @@
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
-html_logo = '_static/python-future-logo.png'
+html_logo = '_static/python-future-logo-textless-transparent.png'
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
-#html_favicon = None
+html_favicon = "_static/python-future-icon-32.ico"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
@@ -136,9 +205,12 @@
# Custom sidebar templates, maps document names to template names.
html_sidebars = {
- 'index': ['sidebarlogo.html', 'sidebarintro.html',
- 'sourcelink.html', 'searchbox.html'],
- '**': ['sidebarlogo.html', 'localtoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html']
+ '**': ['sidebarintro.html',
+ 'sidebartoc.html',
+ # 'sourcelink.html',
+ #'searchbox.html',
+ ]
+ # '**': ['sidebarlogo.html', 'localtoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html']
}
# Additional templates that should be rendered to pages, maps page names to
@@ -155,10 +227,10 @@
#html_split_index = False
# If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+html_show_sourcelink = False
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
+html_show_sphinx = False
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
@@ -242,7 +314,7 @@
# dir menu entry, description, category)
texinfo_documents = [
('index', 'Python-Future', u'Python-Future Documentation',
- u'Python Charmers', 'Python-Future', 'Easy support for Python 2 and 3',
+ u'Python Charmers', 'Python-Future', 'Easy compatibility for Python 2 and 3',
'Miscellaneous'),
]
diff --git a/docs/contents.rst.inc b/docs/contents.rst.inc
index e6935849..869b3642 100644
--- a/docs/contents.rst.inc
+++ b/docs/contents.rst.inc
@@ -1,25 +1,26 @@
-Contents:
----------
+Contents
+========
.. toctree::
- :maxdepth: 2
+ :maxdepth: 3
+ whatsnew
overview
quickstart
+ compatible_idioms
imports
what_else
automatic_conversion
- porting
- standard_library_incompatibilities
faq
+ stdlib_incompatibilities
+ older_interfaces
changelog
credits
reference
Indices and tables
-------------------
+******************
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
-
diff --git a/docs/conversion_limitations.rst b/docs/conversion_limitations.rst
new file mode 100644
index 00000000..c2b15303
--- /dev/null
+++ b/docs/conversion_limitations.rst
@@ -0,0 +1,27 @@
+.. _futurize-limitations:
+
+Known limitations
+-----------------
+
+``futurize`` and ``pasteurize`` are useful to automate much of the
+work of porting, particularly the boring repetitive text substitutions. They also
+help to flag which parts of the code require attention.
+
+Nevertheless, ``futurize`` and ``pasteurize`` are still incomplete and make
+some mistakes, like 2to3, on which they are based. Please report bugs on
+`GitHub
`_. Contributions to
+the ``lib2to3``-based fixers for ``futurize`` and ``pasteurize`` are
+particularly welcome! Please see :ref:`contributing`.
+
+``futurize`` doesn't currently make the following change automatically:
+
+1. Strings containing ``\U`` produce a ``SyntaxError`` on Python 3. An example is::
+
+ s = 'C:\Users'.
+
+ Python 2 expands this to ``s = 'C:\\Users'``, but Python 3 requires a raw
+ prefix (``r'...'``). This also applies to multi-line strings (including
+ multi-line docstrings).
+
+Also see the tests in ``future/tests/test_futurize.py`` marked
+``@expectedFailure`` or ``@skip`` for known limitations.
diff --git a/docs/credits.rst b/docs/credits.rst
index be1d6e25..4c029efd 100644
--- a/docs/credits.rst
+++ b/docs/credits.rst
@@ -1,41 +1,25 @@
-Credits
-=======
-
-:Author: Ed Schofield
-:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte
- Ltd, Singapore. http://pythoncharmers.com
-:Others: - The backported ``super()`` and ``range()`` functions are
- derived from Ryan Kelly's ``magicsuper`` module and Dan
- Crosta's ``xrange`` module.
- - The ``futurize`` script uses ``lib2to3``, ``lib3to2``, and
- parts of Armin Ronacher's ``python-modernize`` code.
- - The ``python_2_unicode_compatible`` decorator is from
- Django. The ``implements_iterator`` and ``with_metaclass``
- decorators are from Jinja2.
- - ``future`` incorporates the ``six`` module by Benjamin
- Peterson as ``future.utils.six``.
- - Documentation is generated using ``sphinx`` using an
- adaptation of Armin Ronacher's stylesheets from Jinja2.
+Licensing and credits
+=====================
.. _licence:
-Licensing
----------
+Licence
+-------
The software is distributed under an MIT licence. The text is as follows
-(from LICENSE.txt):
+(from ``LICENSE.txt``)::
+
+ Copyright (c) 2013-2024 Python Charmers, Australia
- Copyright (c) 2013-2014 Python Charmers Pty Ltd, Australia
-
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
-
+
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
-
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -44,3 +28,154 @@ The software is distributed under an MIT licence. The text is as follows
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
+.. _sponsor:
+
+Sponsors
+--------
+
+Python Charmers: https://pythoncharmers.com
+
+.. _authors:
+
+Author
+-------
+
+Python-Future was largely written by Ed Schofield
.
+
+Maintainers
+-----------
+
+The project is no longer being actively maintained. Like Python 2, it should be
+considered end-of-life.
+
+Past maintainers include:
+
+- Jordan M. Adler
+- Liuyang Wan
+- Ed Schofield
+
+Contributors
+------------
+
+Thanks to the following people for helping to improve the package:
+
+- Jordan Adler
+- Jeroen Akkerman
+- Bruno Alla
+- Kyle Altendorf
+- Nuno André
+- Kian-Meng Ang
+- Grant Bakker
+- Jacob Beck
+- David Bern
+- Fumihiro (Ben) Bessho
+- Shiva Bhusal
+- Andrew Bjonnes
+- Nate Bogdanowicz
+- Tomer Chachamu
+- Christian Clauss
+- Denis Cornehl
+- Joseph Curtis
+- Nicolas Delaby
+- Chad Dombrova
+- Jon Dufresne
+- Corey Farwell
+- Eric Firing
+- Joe Gordon
+- Gabriela Gutierrez
+- Maximilian Hils
+- Tomáš Hrnčiar
+- Miro Hrončok
+- Mark Huang
+- Martijn Jacobs
+- Michael Joseph
+- Waldemar Kornewald
+- Alexey Kotlyarov
+- Steve Kowalik
+- Lion Krischer
+- Marcin Kuzminski
+- Joshua Landau
+- German Larrain
+- Chris Lasher
+- ghanshyam lele
+- Calum Lind
+- Tobias Megies
+- Anika Mukherji
+- Jon Parise
+- Matthew Parnell
+- Tom Picton
+- Sebastian Potasiak
+- Miga Purg
+- Éloi Rivard
+- Greg Roodt
+- Sesh Sadasivam
+- Elliott Sales de Andrade
+- Aiden Scandella
+- Yury Selivanov
+- Alexander Shadchin
+- Tim Shaffer
+- Christopher Slycord
+- Sameera Somisetty
+- Nicola Soranzo
+- Louis Sautier
+- Will Shanks
+- Gregory P. Smith
+- Chase Sterling
+- Matthew Stidham
+- Daniel Szoska
+- Flaviu Tamas
+- Roman A. Taycher
+- Jeff Tratner
+- Tim Tröndle
+- Brad Walker
+- Liuyang Wan
+- Andrew Wason
+- Jeff Widman
+- Dan Yeaw
+- Hackalog (GitHub user)
+- lsm (GiHub user)
+- Mystic-Mirage (GitHub user)
+- str4d (GitHub user)
+- ucodery (GitHub user)
+- urain39 (GitHub user)
+- 9seconds (GitHub user)
+- Varriount (GitHub user)
+- zihzihtw (GitHub user)
+
+Suggestions and Feedback
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+- Chris Adams
+- Martijn Faassen
+- Joe Gordon
+- Lion Krischer
+- Danielle Madeley
+- Val Markovic
+- wluebbe (GitHub user)
+
+
+Other Credits
+-------------
+
+- The backported ``super()`` and ``range()`` functions are derived from Ryan
+ Kelly's ``magicsuper`` module and Dan Crosta's ``xrange`` module.
+
+- The ``futurize`` and ``pasteurize`` scripts use ``lib2to3``, ``lib3to2``, and
+ parts of Armin Ronacher's ``python-modernize`` code.
+
+- The ``python_2_unicode_compatible`` decorator is from Django. The
+ ``implements_iterator`` and ``with_metaclass`` decorators are from Jinja2.
+
+- The ``exec_`` function and some others in ``future.utils`` are from the
+ ``six`` module by Benjamin Peterson.
+
+- The ``raise_`` and ``raise_with_traceback`` functions were contributed by
+ Jeff Tratner.
+
+- A working version of ``raise_from`` was contributed by Varriount (GitHub).
+
+- Documentation is generated with `Sphinx `_ using the
+ ``sphinx-bootstrap`` theme.
+
+- ``past.translation`` is inspired by and borrows some code from Sanjay Vinip's
+ ``uprefix`` module.
diff --git a/docs/custom_iterators.rst b/docs/custom_iterators.rst
index 712e00a6..6ff389a4 100644
--- a/docs/custom_iterators.rst
+++ b/docs/custom_iterators.rst
@@ -3,26 +3,92 @@
Custom iterators
----------------
-If you define your own iterators, there is an incompatibility in the
-method name across Py3 and Py2. On Python 3 it is ``__next__``, whereas
-on Python 2 it is ``next``.
+If you define your own iterators, there is an incompatibility in the method name
+to retrieve the next item across Py3 and Py2. On Python 3 it is ``__next__``,
+whereas on Python 2 it is ``next``.
-Use the following decorator to allow Py3-style iterators to work
-identically on Py2::
+The most elegant solution to this is to derive your custom iterator class from
+``builtins.object`` and define a ``__next__`` method as you normally
+would on Python 3. On Python 2, ``object`` then refers to the
+``future.types.newobject`` base class, which provides a fallback ``next``
+method that calls your ``__next__``. Use it as follows::
- from future.utils import implements_iterator
+ from builtins import object
- @implements_iterator
class Upper(object):
def __init__(self, iterable):
self._iter = iter(iterable)
- def __next__(self): # note the Py3 interface
+ def __next__(self): # Py3-style iterator interface
+ return next(self._iter).upper()
+ def __iter__(self):
+ return self
+
+ itr = Upper('hello')
+ assert next(itr) == 'H'
+ assert next(itr) == 'E'
+ assert list(itr) == list('LLO')
+
+
+You can use this approach unless you are defining a custom iterator as a
+subclass of a base class defined elsewhere that does not derive from
+``newobject``. In that case, you can provide compatibility across
+Python 2 and Python 3 using the ``next`` function from ``future.builtins``::
+
+ from builtins import next
+
+ from some_module import some_base_class
+
+ class Upper2(some_base_class):
+ def __init__(self, iterable):
+ self._iter = iter(iterable)
+ def __next__(self): # Py3-style iterator interface
return next(self._iter).upper()
def __iter__(self):
return self
- print(list(Upper('hello')))
+ itr2 = Upper2('hello')
+ assert next(itr2) == 'H'
+ assert next(itr2) == 'E'
+
+``next()`` also works with regular Python 2 iterators with a ``.next`` method::
+
+ itr3 = iter(['one', 'three', 'five'])
+ assert 'next' in dir(itr3)
+ assert next(itr3) == 'one'
+
+This approach is feasible whenever your code calls the ``next()`` function
+explicitly. If you consume the iterator implicitly in a ``for`` loop or
+``list()`` call or by some other means, the ``future.builtins.next`` function
+will not help; the third assertion below would fail on Python 2::
+
+ itr2 = Upper2('hello')
+
+ assert next(itr2) == 'H'
+ assert next(itr2) == 'E'
+ assert list(itr2) == list('LLO') # fails because Py2 implicitly looks
+ # for a ``next`` method.
+
+Instead, you can use a decorator called ``implements_iterator`` from
+``future.utils`` to allow Py3-style iterators to work identically on Py2, even
+if they don't inherit from ``future.builtins.object``. Use it as follows::
+
+ from future.utils import implements_iterator
+
+ Upper2 = implements_iterator(Upper2)
+
+ print(list(Upper2('hello')))
# prints ['H', 'E', 'L', 'L', 'O']
-On Python 3 this decorator does nothing.
+This can of course also be used with the ``@`` decorator syntax when defining
+the iterator as follows::
+
+ @implements_iterator
+ class Upper2(some_base_class):
+ def __init__(self, iterable):
+ self._iter = iter(iterable)
+ def __next__(self): # note the Py3 interface
+ return next(self._iter).upper()
+ def __iter__(self):
+ return self
+On Python 3, as usual, this decorator does nothing.
diff --git a/docs/custom_str_methods.rst b/docs/custom_str_methods.rst
index d45eb0dc..12c3c6b3 100644
--- a/docs/custom_str_methods.rst
+++ b/docs/custom_str_methods.rst
@@ -18,7 +18,7 @@ Py2 and define ``__str__`` to encode it as utf-8::
return u'Unicode string: \u5b54\u5b50'
a = MyClass()
- # This then prints the Chinese characters for Confucius:
+ # This then prints the name of a Chinese philosopher:
print(a)
This decorator is identical to the decorator of the same name in
diff --git a/docs/dev_notes.rst b/docs/dev_notes.rst
new file mode 100644
index 00000000..6985bca4
--- /dev/null
+++ b/docs/dev_notes.rst
@@ -0,0 +1,16 @@
+Notes
+-----
+This module only supports Python 2.7, and Python 3.4+.
+
+The following renames are already supported on Python 2.7 without any
+additional work from us::
+
+ reload() -> imp.reload()
+ reduce() -> functools.reduce()
+ StringIO.StringIO -> io.StringIO
+ Bytes.BytesIO -> io.BytesIO
+
+Old things that can one day be fixed automatically by futurize.py::
+
+ string.uppercase -> string.ascii_uppercase # works on either Py2.7 or Py3+
+ sys.maxint -> sys.maxsize # but this isn't identical
diff --git a/docs/development.rst b/docs/development.rst
new file mode 100644
index 00000000..a12f2ca5
--- /dev/null
+++ b/docs/development.rst
@@ -0,0 +1,19 @@
+.. developer-docs
+
+Developer docs
+==============
+
+The easiest way to start developing ``python-future`` is as follows:
+
+1. Install Anaconda Python distribution
+
+2. Run::
+
+ conda install -n future2 python=2.7 pip
+ conda install -n future3 python=3.4 pip
+
+ git clone https://github.com/PythonCharmers/python-future
+
+3. If you are using Anaconda Python distribution, this comes without a ``test``
+module on Python 2.x. Copy ``Python-2.7.6/Lib/test`` from the Python source tree
+to ``~/anaconda/envs/yourenvname/lib/python2.7/site-packages/`.
diff --git a/docs/dict_object.rst b/docs/dict_object.rst
index 4f3a594c..165cf763 100644
--- a/docs/dict_object.rst
+++ b/docs/dict_object.rst
@@ -7,85 +7,86 @@ Python 3 dictionaries have ``.keys()``, ``.values()``, and ``.items()``
methods which return memory-efficient set-like iterator objects, not lists.
(See `PEP 3106 `_.)
-``future.builtins`` provides a Python 2 ``dict`` subclass whose :func:`keys`,
-:func:`values`, and :func:`items` methods return iterators. On Python 2.7,
-these iterators have the same set-like view behaviour as dictionaries in
-Python 3. This can streamline code needing to iterate over large dictionaries.
-For example::
+If your dictionaries are small, performance is not critical, and you don't need
+the set-like behaviour of iterator objects from Python 3, you can of course
+stick with standard Python 3 code in your Py2/3 compatible codebase::
+
+ # Assuming d is a native dict ...
+
+ for key in d:
+ # code here
+
+ for item in d.items():
+ # code here
+
+ for value in d.values():
+ # code here
+
+In this case there will be memory overhead of list creation on Py2 for each
+call to ``items``, ``values`` or ``keys``.
+
+For improved efficiency, ``future.builtins`` (aliased to ``builtins``) provides
+a Python 2 ``dict`` subclass whose :func:`keys`, :func:`values`, and
+:func:`items` methods return iterators on all versions of Python >= 2.7. On
+Python 2.7, these iterators also have the same set-like view behaviour as
+dictionaries in Python 3. This can streamline code that iterates over large
+dictionaries. For example::
from __future__ import print_function
- from future.builtins import dict, range
-
+ from builtins import dict, range
+
# Memory-efficient construction:
d = dict((i, i**2) for i in range(10**7))
-
+
assert not isinstance(d.items(), list)
-
- # Because items() is memory-efficient, so is this:
- d2 = dict((i_squared, i) for (i, i_squared) in d.items())
+ # Because items() is memory-efficient, so is this:
+ d2 = dict((v, k) for (k, v) in d.items())
-On Python 2.6, these methods currently return iterators that do not support the
-new Py3 set-like behaviour.
+As usual, on Python 3 ``dict`` imported from either ``builtins`` or
+``future.builtins`` is just the built-in ``dict`` class.
Memory-efficiency and alternatives
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-If you already have large native dictionaries, the downside to wrapping them in a
-``dict`` call is that memory is copied (on both Py3 and with
-``future.builtins.dict``). For example::
+If you already have large native dictionaries, the downside to wrapping them in
+a ``dict`` call is that memory is copied (on both Py3 and on Py2). For
+example::
- # Currently, this allocates and then frees a large amount of temporary
- # memory:
+ # This allocates and then frees a large amount of temporary memory:
d = dict({i: i**2 for i in range(10**7)})
If dictionary methods like ``values`` and ``items`` are called only once, this
-obviously negates the memory benefits the overridden methods offer with not
-creating temporary lists.
+obviously negates the memory benefits offered by the overridden methods through
+not creating temporary lists.
-The memory-efficient (and CPU-efficient) alternatives are either::
+The memory-efficient (and CPU-efficient) alternatives are:
-- to construct a dictionary from an iterator -- as above with the generator expression ``dict((i, i**2) for i in range(10**7)``;
-- to construct an empty dictionary with a ``dict()`` call using ``future.builtins.dict`` (rather than ``{}``) and update it incrementally;
-- to use
+- to construct a dictionary from an iterator. The above line could use a
+ generator like this::
-If your dictionaries are small or performance is not critical, you can of course stick
-with standard Python 3 code in your Py2/3 compatible codebase::
-
- # Assuming d is a native dict ...
+ d = dict((i, i**2) for i in range(10**7))
- for item in d:
- # code here
+- to construct an empty dictionary with a ``dict()`` call using
+ ``builtins.dict`` (rather than ``{}``) and then update it;
- for item in d.items():
- # code here
-
- for value in d.values():
- # code here
-
-In this case there will be memory overhead of list creation for each call of
-``items``, ``values`` or ``keys``.
-
-If your dictionaries are large, or if you want to use the Python 3
-set-like behaviour on both Py3 and Python 2.7, then you can instead use the
-``viewkeys`` etc. functions from :mod:`future.utils`, passing in regular
-dictionaries::
+- to use the ``viewitems`` etc. functions from :mod:`future.utils`, passing in
+ regular dictionaries::
from future.utils import viewkeys, viewvalues, viewitems
for (key, value) in viewitems(hugedictionary):
# some code here
-
+
# Set intersection:
d = {i**2: i for i in range(1000)}
both = viewkeys(d) & set(range(0, 1000, 7))
-
+
# Set union:
both = viewvalues(d1) | viewvalues(d2)
-For Python 2.6 compatibility, the functions ``iteritems`` etc. are also
-available in :mod:`future.utils`. These are equivalent to the functions of the
-same names in ``six``, which is equivalent to calling the ``iteritems`` etc.
-methods on Python 2, or to calling ``items`` etc. on Python 3.
-
+For compatibility, the functions ``iteritems`` etc. are also available in
+:mod:`future.utils`. These are equivalent to the functions of the same names in
+``six``, which is equivalent to calling the ``iteritems`` etc. methods on
+Python 2, or to calling ``items`` etc. on Python 3.
diff --git a/docs/faq.rst b/docs/faq.rst
index 93a74c49..e49adf61 100644
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -4,15 +4,11 @@ Frequently Asked Questions (FAQ)
Who is this for?
================
-1. People who would prefer to write clean, future-proof Python
-3-compatible code, but whose day-jobs require that their code still run
-on a Python 2 stack.
+1. People with existing or new Python 3 codebases who wish to provide
+ongoing Python 2.7 support easily and with little maintenance burden.
-2. People who wish to simplify migration of their codebases to Python
-3.3+, module by module, without giving up Python 2 compatibility.
-
-3. People with existing or new Python 3 codebases who wish to provide
-ongoing Python 2.6 / 2.7 support easily and with little maintenance burden.
+2. People who wish to ease and accelerate migration of their Python 2 codebases
+to Python 3.3+, module by module, without giving up Python 2 compatibility.
Why upgrade to Python 3?
@@ -28,23 +24,23 @@ Python 2.7 is the end of the Python 2 line. (See `PEP 404
`_.) The language and standard
libraries are improving only in Python 3.x.
-Python 3.3 is a better language and better set of standard libraries than
-Python 2.x in almost every way. Python 3 is cleaner, less warty, and easier to
+Python 3.x is a better language and better set of standard libraries than
+Python 2.x in many ways. Python 3.x is cleaner, less warty, and easier to
learn than Python 2. It has better memory efficiency, easier Unicode handling,
-and powerful new features like function annotations and the `asyncio
-`_ module.
+and powerful new features like the `asyncio
+`_ module.
.. Unicode handling is also much easier. For example, see `this page
.. `_
.. describing some of the problems with handling Unicode on Python 2 that
-.. Python 3 mostly solves.
+.. Python 3 mostly solves.
Porting philosophy
==================
-Why use this approach?
-----------------------
+Why write Python 3-style code?
+------------------------------
Here are some quotes:
@@ -65,14 +61,15 @@ Here are some quotes:
difficulties associated with the Python 3 transition (like
distinguishing their 8-bit text strings from their binary data). They
shouldn't be punished with additional code changes ..." from `PEP 414
- `_ by Nick Coghlan.
+ `_ by Armin Ronacher and Nick
+ Coghlan.
Can't I just roll my own Py2/3 compatibility layer?
---------------------------------------------------
-Yes, but using ``future`` will probably lead to cleaner code with fewer
-bugs.
+Yes, but using ``python-future`` will probably be easier and lead to cleaner
+code with fewer bugs.
Consider this quote:
@@ -87,64 +84,73 @@ Consider this quote:
``future`` also includes various Py2/3 compatibility tools in
:mod:`future.utils` picked from large projects (including IPython,
-Django, Jinja2, Pandas), which should hopefully reduce the burden on
-every project to roll its own py3k compatibility wrapper module.
+Django, Jinja2, Pandas), which should reduce the burden on every project to
+roll its own py3k compatibility wrapper module.
+
+What inspired this project?
+---------------------------
-How did the original need for this arise?
------------------------------------------
+In our Python training courses, we at `Python Charmers
+`_ faced a dilemma: teach people Python 3, which was
+future-proof but not as useful to them today because of weaker 3rd-party
+package support, or teach people Python 2, which was more useful today but
+would require them to change their code and unlearn various habits soon. We
+searched for ways to avoid polluting the world with more deprecated code, but
+didn't find a good way.
-In teaching Python, we at Python Charmers faced a dilemma: teach people
-Python 3, which was future-proof but not as useful to them today because
-of weaker 3rd-party package support, or teach people Python 2, which was
-more useful today but would require them to change their code and unlearn
-various habits soon. We searched for ways to avoid polluting the world
-with more deprecated code, but didn't find a good way.
+Also, in attempting to help with porting packages such as `scikit-learn
+`_ to Python 3, I (Ed) was dissatisfied with how much
+code cruft was necessary to introduce to support Python 2 and 3 from a single
+codebase (the preferred porting option). Since backward-compatibility with
+Python 2 may be necessary for at least the next 5 years, one of the promised
+benefits of Python 3 -- cleaner code with fewer of Python 2's warts -- was
+difficult to realize before in practice in a single codebase that supported
+both platforms.
-Also, in attempting to help with porting packages such as
-``scikit-learn`` to Python 3, I was dissatisfied with how much code cruft
-was necessary to introduce to support Python 2 and 3 from a single
-codebase (the preferred porting option). Since backward-compatibility
-with Python 2 may be necessary for at least the next 5 years, one of the
-promised benefits of Python 3 -- cleaner code with fewer of Python 2's
-warts -- was difficult to realize before in practice in a single codebase
-that supported both platforms.
+The goal is to accelerate the uptake of Python 3 and help the strong Python
+community to remain united around a single version of the language.
Maturity
========
-Is it tested?
--------------
+How well has it been tested?
+----------------------------
+
+``future`` is used by thousands of projects and has been downloaded over 1.7 billion times. Some projects like Sage have used it to port 800,000+ lines of Python 2 code to Python 2/3.
+
+Currently ``python-future`` has over 1000 unit tests. Many of these are straight
+from the Python 3.3 and 3.4 test suites.
+
+In general, the ``future`` package itself is in good shape, whereas the
+``futurize`` script for automatic porting is imperfect; chances are it will
+require some manual cleanup afterwards. The ``past`` package also needs to be
+expanded.
+
-``future`` currently has 300+ unit tests. In general, the ``future`` package
-itself is in good shape, whereas the ``futurize`` script for automatic porting
-is incomplete and imperfect. (Chances are it will require some manual cleanup
-afterwards.)
-
Is the API stable?
------------------
-Not yet; ``future`` is still in beta. We will try not to break anything which
-was documented and used to work. After version 1.0 is released, the API will
-not change in backward-incompatible ways until a hypothetical version 2.0.
+Yes; ``future`` is mature. We'll make very few changes from here, trying not to
+break anything which was documented and used to work.
..
Are there any example of Python 2 packages ported to Python 3 using ``future`` and ``futurize``?
------------------------------------------------------------------------------------------------
-
+
Yes, an example is the port of ``xlwt``, available `here
`_.
-
+
The code also contains backports for several Py3 standard library
modules under ``future/standard_library/``.
-Relationship between ``future`` and other compatibility tools
-=============================================================
+Relationship between python-future and other compatibility tools
+================================================================
-How does this relate to ``2to3`` and ``lib2to3``?
--------------------------------------------------
+How does this relate to ``2to3``?
+---------------------------------
``2to3`` is a powerful and flexible tool that can produce different
styles of Python 3 code. It is, however, primarily designed for one-way
@@ -166,13 +172,11 @@ most inputs; worse, it allows arbitrary code execution by the user
for specially crafted inputs because of the ``eval()`` executed by Python
2's ``input()`` function.
-This is not an isolated example; almost every output of ``2to3`` will
-need modification to provide backward compatibility with Python 2.
-``future`` is designed for just this purpose.
-
-The ``future`` source tree contains a script called ``futurize`` that is
-based on ``lib2to3``. It is designed to turn either Python 2-only or
-Python 3-only code into code that is compatible with both platforms.
+This is not an isolated example; almost every output of ``2to3`` will need
+modification to provide backward compatibility with Python 2. As an
+alternative, the ``python-future`` project provides a script called
+``futurize`` that is based on ``lib2to3`` but will produce code that is
+compatible with both platforms (Py2 and Py3).
Can I maintain a Python 2 codebase and use 2to3 to automatically convert to Python 3 in the setup script?
@@ -180,12 +184,12 @@ Can I maintain a Python 2 codebase and use 2to3 to automatically convert to Pyth
This was originally the approach recommended by Python's core developers,
but it has some large drawbacks:
-
+
1. First, your actual working codebase will be stuck with Python 2's
warts and smaller feature set for as long as you need to retain Python 2
compatibility. This may be at least 5 years for many projects, possibly
much longer.
-
+
2. Second, this approach carries the significant disadvantage that you
cannot apply patches submitted by Python 3 users against the
auto-generated Python 3 code. (See `this talk
@@ -195,41 +199,44 @@ auto-generated Python 3 code. (See `this talk
What is the relationship between ``future`` and ``six``?
--------------------------------------------------------
-``future`` is a higher-level compatibility layer than ``six`` that
-includes more backported functionality from Python 3 and supports cleaner
-code but requires more modern Python versions to run.
+``python-future`` is a higher-level compatibility layer than ``six`` that
+includes more backported functionality from Python 3, more forward-ported
+functionality from Python 2, and supports cleaner code, but requires more
+modern Python versions to run.
-``future`` and ``six`` share the same goal of making it possible to write
+``python-future`` and ``six`` share the same goal of making it possible to write
a single-source codebase that works on both Python 2 and Python 3.
-``future`` has the further goal of allowing standard Py3 code to run with
+``python-future`` has the further goal of allowing standard Py3 code to run with
almost no modification on both Py3 and Py2. ``future`` provides a more
complete set of support for Python 3's features, including backports of
Python 3 builtins such as the ``bytes`` object (which is very different
to Python 2's ``str`` object) and several standard library modules.
-``future`` supports only Python 2.6+ and Python 3.3+, whereas ``six``
+``python-future`` supports only Python 2.7+ and Python 3.4+, whereas ``six``
supports all versions of Python from 2.4 onwards. (See
:ref:`supported-versions`.) If you must support older Python versions,
-``six`` will be esssential for you. However, beware that maintaining
+``six`` will be essential for you. However, beware that maintaining
single-source compatibility with older Python versions is ugly and `not
fun `_.
-If you can drop support for older Python versions, ``future`` leverages
-some important features introduced into Python 2.6 and 2.7, such as
-import hooks, to allow you to write more idiomatic, maintainable code.
+If you can drop support for older Python versions, ``python-future`` leverages
+some important features introduced into Python 2.7, such as
+import hooks, and a comprehensive and well-tested set of backported
+functionality, to allow you to write more idiomatic, maintainable code with
+fewer compatibility hacks.
-What is the relationship between this project and ``python-modernize``?
------------------------------------------------------------------------
+What is the relationship between ``python-future`` and ``python-modernize``?
+----------------------------------------------------------------------------
``python-future`` contains, in addition to the ``future`` compatibility
package, a ``futurize`` script that is similar to ``python-modernize.py``
in intent and design. Both are based heavily on ``2to3``.
-
+
Whereas ``python-modernize`` converts Py2 code into a common subset of
Python 2 and 3, with ``six`` as a run-time dependency, ``futurize``
converts either Py2 or Py3 code into (almost) standard Python 3 code,
-with ``future`` as a run-time dependency.
+with ``future`` as a run-time dependency.
Because ``future`` provides more backported Py3 behaviours from ``six``,
the code resulting from ``futurize`` is more likely to work
@@ -238,25 +245,21 @@ effort.
Platform and version support
-----------------------------
+============================
.. _supported-versions:
-Which versions of Python does ``future`` support?
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Python 2.6, 2.7, and 3.3+ only.
+Which versions of Python does ``python-future`` support?
+--------------------------------------------------------
-Python 2.6 and 2.7 introduced many important forward-compatibility
-features (such as import hooks, ``b'...'`` literals and ``__future__``
-definitions) that greatly reduce the maintenance burden for single-source
-Py2/3 compatible code. ``future`` leverages these features and aims to
-close the remaining gap between Python 3 and 2.6 / 2.7.
+Python 2.6 and 3.3+ only. Python 2.7 and Python 3.4+ are preferred.
-Python 3.2 could perhaps be supported too, although the illegal unicode
-literal ``u'...'`` syntax may be a drawback. The Py3.2 userbase is
-very small, however. Please let us know if you would like to see Py3.2
-support.
+You may be able to use Python 2.6 but writing Py2/3 compatible code is not as
+easy. Python 2.7 introduced many important forward-compatibility features (such
+as import hooks, ``b'...'`` literals and ``__future__`` definitions) that
+greatly reduce the maintenance burden for single-source Py2/3 compatible code.
+``future`` leverages these features and aims to close the remaining gap between
+Python 3 and 2.7.
Do you support Pypy?
@@ -267,7 +270,7 @@ and pull requests are welcome!
Do you support IronPython and/or Jython?
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Not sure. This would be nice...
@@ -280,8 +283,7 @@ Support
Is there a mailing list?
------------------------
-Yes, please ask any questions on the `python-porting
-`_ mailing list.
+There was a `python-porting` mailing list, but it's now dead.
.. _contributing:
@@ -293,12 +295,13 @@ Can I help?
-----------
Yes please :) We welcome bug reports, additional tests, pull requests,
-and stories of either success or failure with using it. Help with the fixers
-for the ``futurize`` script is particularly welcome.
+and stories of either success or failure with using it.
+
+However, please note that the project is not very actively maintained. It
+should be considered done, like Python 2.
Where is the repo?
------------------
``_.
-
diff --git a/docs/future-builtins.rst b/docs/future-builtins.rst
new file mode 100644
index 00000000..df8ff79d
--- /dev/null
+++ b/docs/future-builtins.rst
@@ -0,0 +1,17 @@
+.. _future-builtins:
+
+``future.builtins``
+===================
+
+The ``future.builtins`` module is also accessible as ``builtins`` on Py2.
+
+- ``pow()`` supports fractional exponents of negative numbers like in Py3::
+
+ >>> from builtins import pow
+ >>> pow(-1, 0.5)
+ (6.123233995736766e-17+1j)
+
+- ``round()`` uses Banker's Rounding as in Py3 to the nearest even last digit::
+
+ >>> from builtins import round
+ >>> assert round(0.1250, 2) == 0.12
diff --git a/docs/futurize.rst b/docs/futurize.rst
new file mode 100644
index 00000000..11520a6c
--- /dev/null
+++ b/docs/futurize.rst
@@ -0,0 +1,314 @@
+.. _forwards-conversion:
+
+``futurize``: Py2 to Py2/3
+--------------------------
+
+.. include:: futurize_overview.rst
+
+
+.. _forwards-conversion-stage1:
+
+Stage 1: "safe" fixes
+~~~~~~~~~~~~~~~~~~~~~
+
+Run the first stage of the conversion process with::
+
+ futurize --stage1 mypackage/*.py
+
+or, if you are using zsh, recursively::
+
+ futurize --stage1 mypackage/**/*.py
+
+This applies fixes that modernize Python 2 code without changing the effect of
+the code. With luck, this will not introduce any bugs into the code, or will at
+least be trivial to fix. The changes are those that bring the Python code
+up-to-date without breaking Py2 compatibility. The resulting code will be
+modern Python 2.7-compatible code plus ``__future__`` imports from the
+following set:
+
+.. code-block:: python
+
+ from __future__ import absolute_import
+ from __future__ import division
+ from __future__ import print_function
+
+Only those ``__future__`` imports deemed necessary will be added unless
+the ``--all-imports`` command-line option is passed to ``futurize``, in
+which case they are all added.
+
+The ``from __future__ import unicode_literals`` declaration is not added
+unless the ``--unicode-literals`` flag is passed to ``futurize``.
+
+The changes include::
+
+ - except MyException, e:
+ + except MyException as e:
+
+ - print >>stderr, "Blah"
+ + from __future__ import print_function
+ + print("Blah", stderr)
+
+ - class MyClass:
+ + class MyClass(object):
+
+ - def next(self):
+ + def __next__(self):
+
+ - if d.has_key(key):
+ + if key in d:
+
+Implicit relative imports fixed, e.g.::
+
+ - import mymodule
+ + from __future__ import absolute_import
+ + from . import mymodule
+
+.. and all unprefixed string literals '...' gain a b prefix to be b'...'.
+
+.. (This last step can be prevented using --no-bytes-literals if you already have b'...' markup in your code, whose meaning would otherwise be lost.)
+
+Stage 1 does not add any imports from the ``future`` package. The output of
+stage 1 will probably not (yet) run on Python 3.
+
+The goal for this stage is to create most of the ``diff`` for the entire
+porting process, but without introducing any bugs. It should be uncontroversial
+and safe to apply to every Python 2 package. The subsequent patches introducing
+Python 3 compatibility should then be shorter and easier to review.
+
+The complete set of fixers applied by ``futurize --stage1`` is:
+
+.. code-block:: python
+
+ lib2to3.fixes.fix_apply
+ lib2to3.fixes.fix_except
+ lib2to3.fixes.fix_exec
+ lib2to3.fixes.fix_exitfunc
+ lib2to3.fixes.fix_funcattrs
+ lib2to3.fixes.fix_has_key
+ lib2to3.fixes.fix_idioms
+ lib2to3.fixes.fix_intern
+ lib2to3.fixes.fix_isinstance
+ lib2to3.fixes.fix_methodattrs
+ lib2to3.fixes.fix_ne
+ lib2to3.fixes.fix_numliterals
+ lib2to3.fixes.fix_paren
+ lib2to3.fixes.fix_reduce
+ lib2to3.fixes.fix_renames
+ lib2to3.fixes.fix_repr
+ lib2to3.fixes.fix_standarderror
+ lib2to3.fixes.fix_sys_exc
+ lib2to3.fixes.fix_throw
+ lib2to3.fixes.fix_tuple_params
+ lib2to3.fixes.fix_types
+ lib2to3.fixes.fix_ws_comma
+ lib2to3.fixes.fix_xreadlines
+ libfuturize.fixes.fix_absolute_import
+ libfuturize.fixes.fix_next_call
+ libfuturize.fixes.fix_print_with_import
+ libfuturize.fixes.fix_raise
+
+The following fixers from ``lib2to3`` are not applied:
+
+.. code-block:: python
+
+ lib2to3.fixes.fix_import
+
+The ``fix_absolute_import`` fixer in ``libfuturize.fixes`` is applied instead of
+``lib2to3.fixes.fix_import``. The new fixer both makes implicit relative
+imports explicit and adds the declaration ``from __future__ import
+absolute_import`` at the top of each relevant module.
+
+.. code-block:: python
+
+ lib2to3.fixes.fix_next
+
+The ``fix_next_call`` fixer in ``libfuturize.fixes`` is applied instead of
+``fix_next`` in stage 1. The new fixer changes any ``obj.next()`` calls to
+``next(obj)``, which is Py2/3 compatible, but doesn't change any ``next`` method
+names to ``__next__``, which would break Py2 compatibility.
+
+``fix_next`` is applied in stage 2.
+
+.. code-block:: python
+
+ lib2to3.fixes.fix_print
+
+The ``fix_print_with_import`` fixer in ``libfuturize.fixes`` changes the code to
+use print as a function and also adds ``from __future__ import
+print_function`` to the top of modules using ``print()``.
+
+In addition, it avoids adding an extra set of parentheses if these already
+exist. So ``print(x)`` does not become ``print((x))``.
+
+.. code-block:: python
+
+ lib2to3.fixes.fix_raise
+
+This fixer translates code to use the Python 3-only ``with_traceback()``
+method on exceptions.
+
+.. code-block:: python
+
+ lib2to3.fixes.fix_set_literal
+
+This converts ``set([1, 2, 3]``) to ``{1, 2, 3}``.
+
+.. code-block:: python
+
+ lib2to3.fixes.fix_ws_comma
+
+This performs cosmetic changes. This is not applied by default because it
+does not serve to improve Python 2/3 compatibility. (In some cases it may
+also reduce readability: see issue #58.)
+
+
+
+.. _forwards-conversion-stage2:
+
+Stage 2: Py3-style code with wrappers for Py2
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Run stage 2 of the conversion process with::
+
+ futurize --stage2 myfolder/*.py
+
+This stage adds a dependency on the ``future`` package. The goal for stage 2 is
+to make further mostly safe changes to the Python 2 code to use Python 3-style
+code that then still runs on Python 2 with the help of the appropriate builtins
+and utilities in ``future``.
+
+For example::
+
+ name = raw_input('What is your name?\n')
+
+ for k, v in d.iteritems():
+ assert isinstance(v, basestring)
+
+ class MyClass(object):
+ def __unicode__(self):
+ return u'My object'
+ def __str__(self):
+ return unicode(self).encode('utf-8')
+
+would be converted by Stage 2 to this code::
+
+ from builtins import input
+ from builtins import str
+ from future.utils import iteritems, python_2_unicode_compatible
+
+ name = input('What is your name?\n')
+
+ for k, v in iteritems(d):
+ assert isinstance(v, (str, bytes))
+
+ @python_2_unicode_compatible
+ class MyClass(object):
+ def __str__(self):
+ return u'My object'
+
+Stage 2 also renames standard-library imports to their Py3 names and adds these
+two lines::
+
+ from future import standard_library
+ standard_library.install_aliases()
+
+For example::
+
+ import ConfigParser
+
+becomes::
+
+ from future import standard_library
+ standard_library.install_aliases()
+ import configparser
+
+The complete list of fixers applied in Stage 2 is::
+
+ lib2to3.fixes.fix_dict
+ lib2to3.fixes.fix_filter
+ lib2to3.fixes.fix_getcwdu
+ lib2to3.fixes.fix_input
+ lib2to3.fixes.fix_itertools
+ lib2to3.fixes.fix_itertools_imports
+ lib2to3.fixes.fix_long
+ lib2to3.fixes.fix_map
+ lib2to3.fixes.fix_next
+ lib2to3.fixes.fix_nonzero
+ lib2to3.fixes.fix_operator
+ lib2to3.fixes.fix_raw_input
+ lib2to3.fixes.fix_zip
+
+ libfuturize.fixes.fix_basestring
+ libfuturize.fixes.fix_cmp
+ libfuturize.fixes.fix_division_safe
+ libfuturize.fixes.fix_execfile
+ libfuturize.fixes.fix_future_builtins
+ libfuturize.fixes.fix_future_standard_library
+ libfuturize.fixes.fix_future_standard_library_urllib
+ libfuturize.fixes.fix_metaclass
+ libpasteurize.fixes.fix_newstyle
+ libfuturize.fixes.fix_object
+ libfuturize.fixes.fix_unicode_keep_u
+ libfuturize.fixes.fix_xrange_with_import
+
+
+Not applied::
+
+ lib2to3.fixes.fix_buffer # Perhaps not safe. Test this.
+ lib2to3.fixes.fix_callable # Not needed in Py3.2+
+ lib2to3.fixes.fix_execfile # Some problems: see issue #37.
+ # We use the custom libfuturize.fixes.fix_execfile instead.
+ lib2to3.fixes.fix_future # Removing __future__ imports is bad for Py2 compatibility!
+ lib2to3.fixes.fix_imports # Called by libfuturize.fixes.fix_future_standard_library
+ lib2to3.fixes.fix_imports2 # We don't handle this yet (dbm)
+ lib2to3.fixes.fix_metaclass # Causes SyntaxError in Py2! Use the one from ``six`` instead
+ lib2to3.fixes.fix_unicode # Strips off the u'' prefix, which removes a potentially
+ # helpful source of information for disambiguating
+ # unicode/byte strings.
+ lib2to3.fixes.fix_urllib # Included in libfuturize.fix_future_standard_library_urllib
+ lib2to3.fixes.fix_xrange # Custom one because of a bug with Py3.3's lib2to3
+
+
+
+.. Ideally the output of this stage should not be a ``SyntaxError`` on either
+.. Python 3 or Python 2.
+
+.. _forwards-conversion-text:
+
+Separating text from bytes
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+After applying stage 2, the recommended step is to decide which of your Python
+2 strings represent text and which represent binary data and to prefix all
+string literals with either ``b`` or ``u`` accordingly. Furthermore, to ensure
+that these types behave similarly on Python 2 as on Python 3, also wrap
+byte-strings or text in the ``bytes`` and ``str`` types from ``future``. For
+example::
+
+ from builtins import bytes, str
+ b = bytes(b'\x00ABCD')
+ s = str(u'This is normal text')
+
+Any unadorned string literals will then represent native platform strings
+(byte-strings on Py2, unicode strings on Py3).
+
+An alternative is to pass the ``--unicode-literals`` flag::
+
+ $ futurize --unicode-literals mypython2script.py
+
+After running this, all string literals that were not explicitly marked up as
+``b''`` will mean text (Python 3 ``str`` or Python 2 ``unicode``).
+
+
+
+.. _forwards-conversion-stage3:
+
+Post-conversion
+~~~~~~~~~~~~~~~
+
+After running ``futurize``, we recommend first running your tests on Python 3 and making further code changes until they pass on Python 3.
+
+The next step would be manually tweaking the code to re-enable Python 2
+compatibility with the help of the ``future`` package. For example, you can add
+the ``@python_2_unicode_compatible`` decorator to any classes that define custom
+``__str__`` methods. See :ref:`what-else` for more info.
diff --git a/docs/futurize_cheatsheet.rst b/docs/futurize_cheatsheet.rst
new file mode 100644
index 00000000..82f211c6
--- /dev/null
+++ b/docs/futurize_cheatsheet.rst
@@ -0,0 +1,124 @@
+.. _futurize_cheatsheet:
+
+``futurize`` quick-start guide
+------------------------------
+
+How to convert Py2 code to Py2/3 code using ``futurize``:
+
+.. _porting-setup:
+
+Step 0: setup
+~~~~~~~~~~~~~
+
+Step 0 goal: set up and see the tests passing on Python 2 and failing on Python 3.
+
+a. Clone the package from github/bitbucket. Optionally rename your repo to ``package-future``. Examples: ``reportlab-future``, ``paramiko-future``, ``mezzanine-future``.
+b. Create and activate a Python 2 conda environment or virtualenv. Install the package with ``python setup.py install`` and run its test suite on Py2.7 (e.g. ``python setup.py test`` or ``py.test``)
+c. Optionally: if there is a ``.travis.yml`` file, add Python version 3.6 and remove any versions < 2.6.
+d. Install Python 3 with e.g. ``sudo apt-get install python3``. On other platforms, an easy way is to use `Miniconda `_. Then e.g.::
+
+ conda create -n py36 python=3.6 pip
+
+.. _porting-step1:
+
+Step 1: modern Py2 code
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The goal for this step is to modernize the Python 2 code without introducing any dependencies (on ``future`` or e.g. ``six``) at this stage.
+
+**1a**. Install ``future`` into the virtualenv using::
+
+ pip install future
+
+**1b**. Run ``futurize --stage1 -w *.py subdir1/*.py subdir2/*.py``. Note that with
+recursive globbing in ``bash`` or ``zsh``, you can apply stage 1 to all source files
+recursively with::
+
+ futurize --stage1 -w .
+
+**1c**. Commit all changes
+
+**1d**. Re-run the test suite on Py2 and fix any errors.
+
+See :ref:`forwards-conversion-stage1` for more info.
+
+
+Example error
+*************
+
+One relatively common error after conversion is::
+
+ Traceback (most recent call last):
+ ...
+ File "/home/user/Install/BleedingEdge/reportlab/tests/test_encrypt.py", line 19, in
+ from .test_pdfencryption import parsedoc
+ ValueError: Attempted relative import in non-package
+
+If you get this error, try adding an empty ``__init__.py`` file in the package
+directory. (In this example, in the tests/ directory.) If this doesn’t help,
+and if this message appears for all tests, they must be invoked differently
+(from the cmd line or e.g. ``setup.py``). The way to run a module inside a
+package on Python 3, or on Python 2 with ``absolute_import`` in effect, is::
+
+ python -m tests.test_platypus_xref
+
+(For more info, see `PEP 328 `_ and
+the `PEP 8 `_ section on absolute
+imports.)
+
+
+.. _porting-step2:
+
+Step 2: working Py3 code that still supports Py2
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The goal for this step is to get the tests passing first on Py3 and then on Py2
+again with the help of the ``future`` package.
+
+**2a**. Run::
+
+ futurize --stage2 myfolder1/*.py myfolder2/*.py
+
+You can view the stage 2 changes to all Python source files recursively with::
+
+ futurize --stage2 .
+
+To apply the changes, add the ``-w`` argument.
+
+This stage makes further conversions needed to support both Python 2 and 3.
+These will likely require imports from ``future`` on Py2 (and sometimes on Py3),
+such as::
+
+ from future import standard_library
+ standard_library.install_aliases()
+ # ...
+ from builtins import bytes
+ from builtins import open
+ from future.utils import with_metaclass
+
+Optionally, you can use the ``--unicode-literals`` flag to add this import to
+the top of each module::
+
+ from __future__ import unicode_literals
+
+All strings in the module would then be unicode on Py2 (as on Py3) unless
+explicitly marked with a ``b''`` prefix.
+
+If you would like ``futurize`` to import all the changed builtins to have their
+Python 3 semantics on Python 2, invoke it like this::
+
+ futurize --stage2 --all-imports myfolder/*.py
+
+
+**2b**. Re-run your tests on Py3 now. Make changes until your tests pass on Python 3.
+
+**2c**. Commit your changes! :)
+
+**2d**. Now run your tests on Python 2 and notice the errors. Add wrappers from
+``future`` to re-enable Python 2 compatibility. See the
+:ref:`compatible-idioms` cheat sheet and :ref:`what-else` for more info.
+
+After each change, re-run the tests on Py3 and Py2 to ensure they pass on both.
+
+**2e**. You're done! Celebrate! Push your code and announce to the world! Hashtags
+#python3 #python-future.
diff --git a/docs/futurize_overview.rst b/docs/futurize_overview.rst
new file mode 100644
index 00000000..769b65c7
--- /dev/null
+++ b/docs/futurize_overview.rst
@@ -0,0 +1,55 @@
+The ``futurize`` script passes Python 2 code through all the appropriate fixers
+to turn it into valid Python 3 code, and then adds ``__future__`` and
+``future`` package imports to re-enable compatibility with Python 2.
+
+For example, running ``futurize`` turns this Python 2 code:
+
+.. code-block:: python
+
+ import ConfigParser # Py2 module name
+
+ class Upper(object):
+ def __init__(self, iterable):
+ self._iter = iter(iterable)
+ def next(self): # Py2-style iterator interface
+ return next(self._iter).upper()
+ def __iter__(self):
+ return self
+
+ itr = Upper('hello')
+ print next(itr),
+ for letter in itr:
+ print letter, # Py2-style print statement
+
+into this code which runs on both Py2 and Py3:
+
+.. code-block:: python
+
+ from __future__ import print_function
+ from future import standard_library
+ standard_library.install_aliases()
+ from future.builtins import next
+ from future.builtins import object
+ import configparser # Py3-style import
+
+ class Upper(object):
+ def __init__(self, iterable):
+ self._iter = iter(iterable)
+ def __next__(self): # Py3-style iterator interface
+ return next(self._iter).upper()
+ def __iter__(self):
+ return self
+
+ itr = Upper('hello')
+ print(next(itr), end=' ') # Py3-style print function
+ for letter in itr:
+ print(letter, end=' ')
+
+
+To write out all the changes to your Python files that ``futurize`` suggests,
+use the ``-w`` flag.
+
+For complex projects, it is probably best to divide the porting into two stages.
+Stage 1 is for "safe" changes that modernize the code but do not break Python
+2.7 compatibility or introduce a dependency on the ``future`` package. Stage 2
+is to complete the process.
diff --git a/docs/hindsight.rst b/docs/hindsight.rst
index a7b283a1..b4654c6a 100644
--- a/docs/hindsight.rst
+++ b/docs/hindsight.rst
@@ -1,4 +1,3 @@
In a perfect world, the new metaclass syntax should ideally be available in
Python 2 as a `__future__`` import like ``from __future__ import
new_metaclass_syntax``.
-
diff --git a/docs/imports.rst b/docs/imports.rst
index 5800bea4..f7dcd9fc 100644
--- a/docs/imports.rst
+++ b/docs/imports.rst
@@ -3,10 +3,10 @@
Imports
=======
-.. ___future__-imports:
+.. _-__future__-imports:
__future__ imports
-~~~~~~~~~~~~~~~~~~
+------------------
To write a Python 2/3 compatible codebase, the first step is to add this line
to the top of each module::
@@ -24,28 +24,29 @@ standard feature of Python, see the following docs:
- print_function: `PEP 3105: Make print a function `_
- unicode_literals: `PEP 3112: Bytes literals in Python 3000 `_
-These are all available in Python 2.6 and up, and enabled by default in Python 3.x.
+These are all available in Python 2.7 and up, and enabled by default in Python 3.x.
-.. _star-imports:
+.. _builtins-imports:
+
+Imports of builtins
+-------------------
-Star imports
-~~~~~~~~~~~~
+.. _star-imports:
-If you don't mind namespace pollution on Python 2, the easiest way to provide
-Py2/3 compatibility for new code using ``future`` is to include the following
-imports at the top of every module::
+Implicit imports
+~~~~~~~~~~~~~~~~
- from future.builtins import *
+If you don't mind namespace pollution, the easiest way to provide Py2/3
+compatibility for new code using ``future`` is to include the following imports
+at the top of every module::
-together with these module imports when necessary::
-
- from future import standard_library, utils
+ from builtins import *
-On Python 3, ``from future.builtins import *`` line has zero effect and zero
-namespace pollution.
+On Python 3, this has no effect. (It shadows builtins with globals of the same
+names.)
-On Python 2, this import line shadows 16 builtins (listed below) to
+On Python 2, this import line shadows 18 builtins (listed below) to
provide their Python 3 semantics.
@@ -55,40 +56,45 @@ Explicit imports
~~~~~~~~~~~~~~~~
Explicit forms of the imports are often preferred and are necessary for using
-some automated code-analysis tools.
+certain automated code-analysis tools.
-The most common imports from ``future`` are::
-
- from future import standard_library, utils
- from future.builtins import (bytes, int, range, round, str, super,
- ascii, chr, hex, input, oct, open,
- filter, map, zip)
+The complete set of imports of builtins from ``future`` is::
-The disadvantage of importing only some of the builtins is that it
-increases the risk of introducing Py2/3 portability bugs as your code
-evolves over time. Be especially aware of not importing ``input``, which could
-expose a security vulnerability on Python 2 if Python 3's semantics are
-expected.
+ from builtins import (ascii, bytes, chr, dict, filter, hex, input,
+ int, map, next, oct, open, pow, range, round,
+ str, super, zip)
-One further technical distinction is that unlike the ``import *`` form above,
-these explicit imports do actually change ``locals()``; this is equivalent
-to typing ``bytes = bytes; int = int`` etc. for each builtin.
+These are also available under the ``future.builtins`` namespace for backward compatibility.
+
+Importing only some of the builtins is cleaner but increases the risk of
+introducing Py2/3 portability bugs as your code evolves over time. For example,
+be aware of forgetting to import ``input``, which could expose a security
+vulnerability on Python 2 if Python 3's semantics are expected.
+
+.. One further technical distinction is that unlike the ``import *`` form above,
+.. these explicit imports do actually modify ``locals()`` on Py3; this is
+.. equivalent to typing ``bytes = bytes; int = int`` etc. for each builtin.
The internal API is currently as follows::
- from future.builtins.backports import bytes, int, range, round, str, super
- from future.builtins.misc import ascii, chr, hex, input, oct, open
+ from future.types import bytes, dict, int, range, str
+ from future.builtins.misc import (ascii, chr, hex, input, next,
+ oct, open, pow, round, super)
from future.builtins.iterators import filter, map, zip
-To understand the details of the backported builtins on Python 2, see the
-docs for these modules. Please note that this internal API is evolving and may
-not be stable between different versions of ``future``.
+Please note that this internal API is evolving and may not be stable between
+different versions of ``future``. To understand the details of the backported
+builtins on Python 2, see the docs for these modules.
+
+For more information on what the backported types provide, see :ref:`what-else`.
+
+.. < Section about past.translation is included here >
.. _obsolete-builtins:
Obsolete Python 2 builtins
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+__________________________
Twelve Python 2 builtins have been removed from Python 3. To aid with
porting code to Python 3 module by module, you can use the following
@@ -109,202 +115,12 @@ equivalent Python 3 forms and then adds ``future`` imports to resurrect
Python 2 support, as described in :ref:`forwards-conversion-stage2`.
-.. _unicode-literals:
-
-Should I import unicode_literals?
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The ``future`` package can be used with or without ``unicode_literals``
-imports.
-
-There is some contention in the community about whether it is advisable
-to import ``unicode_literals`` from ``__future__`` in a Python 2/3
-compatible codebase.
-
-It is more compelling to use ``unicode_literals`` when back-porting
-new or existing Python 3 code to Python 2/3. For porting existing Python 2
-code to 2/3, explicitly marking up all unicode string literals with ``u''``
-prefixes helps to avoid unintentionally changing an existing Python 2 API.
-
-If you use ``unicode_literals``, testing and debugging your code with
-*Python 3* first is probably the easiest way to fix your code. After this,
-fixing Python 2 support will be easier.
-
-To avoid confusion, we recommend using ``unicode_literals`` everywhere
-across a code-base or not at all, instead of turning on for only some
-modules.
-
-This section summarizes the benefits and drawbacks of using
-``unicode_literals``.
+.. include:: standard_library_imports.rst
-Benefits
---------
-
-1. String literals are unicode on Python 3. Making them unicode on Python 2
- leads to more consistency of your string types the two runtimes. This can
- make it easier to understand and debug your code.
-
-2. Code without ``u''`` prefixes is cleaner, one of the claimed advantages
- of Python 3. Even though some unicode strings would require a function
- call to invert them to native strings for some Python 2 APIs (see
- :ref:`stdlib-incompatibilities`), the incidence of these function calls
- would be much lower than with using ``u''`` prefixes in the absence of
- ``unicode_literals``.
-
-3. The diff for a Python 2 -> 2/3 port may be smaller, less noisy, and
- easier to review with ``unicode_literals`` than if an explicit ``u''``
- prefix is added to every unadorned string literal.
-
-4. If support for Python 3.2 is required (e.g. for Ubuntu 12.04 LTS or
- Debian wheezy), ``u''`` prefixes are a ``SyntaxError``, making
- ``unicode_literals`` the only option for a Python 2/3 compatible
- codebase.
-
-
-Drawbacks
----------
-
-1. Adding ``unicode_literals`` to a module amounts to a "global flag day" for
- that module, changing the data types of all strings in the module at once.
- Cautious developers may prefer an incremental approach. (See
- `here `_ for an excellent article
- describing the superiority of an incremental patch-set in the the case
- of the Linux kernel.)
-
-.. This is a larger-scale change than adding explicit ``u''`` prefixes to
-.. all strings that should be Unicode.
-
-2. Changing to ``unicode_literals`` will likely introduce regressions on
- Python 2 that require an initial investment of time to find and fix. The
- APIs may be changed in subtle ways that are not immediately obvious.
-
- An example on Python 2::
-
- ### Module: mypaths.py
-
- ...
- def unix_style_path(path):
- return path.replace('\\', '/')
- ...
-
- ### User code:
-
- >>> path1 = '\\Users\\Ed'
- >>> unix_style_path(path1)
- u'/Users/ed'
-
- On Python 2, adding a ``unicode_literals`` import to ``mypaths.py`` would
- change the return type of the ``unix_style_path`` function from ``str`` to
- ``unicode``, which is difficult to anticipate and probably unintended.
-
- The counterargument is that this code is broken, in a portability
- sense; we see this from Python 3 raising a ``TypeError`` upon passing the
- function a byte-string. The code needs to be changed to make explicit
- whether the ``path`` argument is to be a byte string or a unicode string.
-
-3. With ``unicode_literals`` in effect, there is no way to specify a native
- string literal (``str`` type on both platforms). This can be worked around as follows::
-
- >>> from __future__ import unicode_literals
- >>> ...
- >>> from future.utils import bytes_to_native_str as n
-
- >>> s = n(b'ABCD')
- >>> s
- 'ABCD' # on both Py2 and Py3
-
- although this incurs a performance penalty (a function call and, on Py3,
- a ``decode`` method call.)
-
- This is a little awkward because various Python library APIs (standard
- and non-standard) require a native string to be passed on both Py2
- and Py3. (See :ref:`stdlib-incompatibilities` for some examples. WSGI
- dictionaries are another.)
-
-3. If a codebase already explicitly marks up all text with ``u''`` prefixes,
- and if support for Python versions 3.0-3.2 can be dropped, then
- removing the existing ``u''`` prefixes and replacing these with
- ``unicode_literals`` imports (the porting approach Django used) would
- introduce more noise into the patch and make it more difficult to review.
- However, note that the ``futurize`` script takes advantage of PEP 414 and
- does not remove explicit ``u''`` prefixes that already exist.
-
-4. Turning on ``unicode_literals`` converts even docstrings to unicode, but
- Pydoc breaks with unicode docstrings containing non-ASCII characters for
- Python versions < 2.7.7. (Fix committed in Jan 2014.)::
-
- >>> def f():
- ... u"Author: Martin von Löwis"
-
- >>> help(f)
-
- /Users/schofield/Install/anaconda/python.app/Contents/lib/python2.7/pydoc.pyc in pipepager(text, cmd)
- 1376 pipe = os.popen(cmd, 'w')
- 1377 try:
- -> 1378 pipe.write(text)
- 1379 pipe.close()
- 1380 except IOError:
-
- UnicodeEncodeError: 'ascii' codec can't encode character u'\xf6' in position 71: ordinal not in range(128)
-
-See `this Stack Overflow thread
-`_
-for other gotchas.
-
-
-Others' perspectives
---------------------
-
-In favour of ``unicode_literals``
-*********************************
-
-The following `quote `_ is from Aymeric Augustin on 23 August 2012 regarding
-why he chose ``unicode_literals`` for the port of Django to a Python
-2/3-compatible codebase.:
-
- "... I'd like to explain why this PEP [PEP 414, which allows explicit
- ``u''`` prefixes for unicode literals on Python 3.3+] is at odds with
- the porting philosophy I've applied to Django, and why I would have
- vetoed taking advantage of it.
-
- "I believe that aiming for a Python 2 codebase with Python 3
- compatibility hacks is a counter-productive way to port a project. You
- end up with all the drawbacks of Python 2 (including the legacy `u`
- prefixes) and none of the advantages Python 3 (especially the sane
- string handling).
-
- "Working to write Python 3 code, with legacy compatibility for Python
- 2, is much more rewarding. Of course it takes more effort, but the
- results are much cleaner and much more maintainable. It's really about
- looking towards the future or towards the past.
-
- "I understand the reasons why PEP 414 was proposed and why it was
- accepted. It makes sense for legacy software that is minimally
- maintained. I hope nobody puts Django in this category!"
-
-
-Against ``unicode_literals``
-****************************
-
- "There are so many subtle problems that ``unicode_literals`` causes.
- For instance lots of people accidentally introduce unicode into
- filenames and that seems to work, until they are using it on a system
- where there are unicode characters in the filesystem path."
-
- -- Armin Ronacher
-
- "+1 from me for avoiding the unicode_literals future, as it can have
- very strange side effects in Python 2.... This is one of the key
- reasons I backed Armin's PEP 414."
-
- -- Nick Coghlan
-
- "Yeah, one of the nuisances of the WSGI spec is that the header values
- IIRC are the str or StringType on both py2 and py3. With
- unicode_literals this causes hard-to-spot bugs, as some WSGI servers
- might be more tolerant than others, but usually using unicode in python
- 2 for WSGI headers will cause the response to fail."
-
- -- Antti Haapala
+.. include:: translation.rst
+.. include:: unicode_literals.rst
+Next steps
+----------
+See :ref:`what-else`.
diff --git a/docs/index.rst b/docs/index.rst
index 9f8f6e2e..cc84c9b7 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,10 +1,9 @@
-future: clean single-source support for Python 2/3
-======================================================
+Easy, clean, reliable Python 2/3 compatibility
+==============================================
-``future`` is the missing compatibility layer between Python 2 and Python
-3. It allows you to use a single, clean Python 3.x-compatible
+``python-future`` is the missing compatibility layer between Python 2 and
+Python 3. It allows you to use a single, clean Python 3.x-compatible
codebase to support both Python 2 and Python 3 with minimal overhead.
.. include:: contents.rst.inc
-
diff --git a/docs/int_object.rst b/docs/int_object.rst
index e0f8a8bd..f774784b 100644
--- a/docs/int_object.rst
+++ b/docs/int_object.rst
@@ -23,7 +23,7 @@ is a subclass of Python 2's ``long`` with the same representation
behaviour as Python 3's ``int``. To ensure an integer is long compatibly with
both Py3 and Py2, cast it like this::
- >>> from future.builtins import int
+ >>> from builtins import int
>>> must_be_a_long_integer = int(1234)
The backported ``int`` object helps with writing doctests and simplifies code
@@ -31,6 +31,8 @@ that deals with ``long`` and ``int`` as special cases on Py2. An example is the
following code from ``xlwt-future`` (called by the ``xlwt.antlr.BitSet`` class)
for writing out Excel ``.xls`` spreadsheets. With ``future``, the code is::
+ from builtins import int
+
def longify(data):
"""
Turns data (an int or long, or a list of ints or longs) into a
@@ -64,9 +66,3 @@ Without ``future`` (or with ``future`` < 0.7), this might be::
return list(map(int, data)) # same as returning data, but with up-front typechecking
else:
return list(map(long, data))
-
-
-Note that ``future.builtins`` defines :func:`isinstance` specially to handle
-Python 2's short integers as well as the backported Py3-like ``int``. See
-:ref:`isinstance-calls`.
-
diff --git a/docs/isinstance.rst b/docs/isinstance.rst
index ce3cee64..2bb5084a 100644
--- a/docs/isinstance.rst
+++ b/docs/isinstance.rst
@@ -4,7 +4,7 @@ isinstance
----------
The following tests all pass on Python 3::
-
+
>>> assert isinstance(2**62, int)
>>> assert isinstance(2**63, int)
>>> assert isinstance(b'my byte-string', bytes)
@@ -39,7 +39,7 @@ then the fifth test fails too::
After importing the builtins from ``future``, all these tests pass on
Python 2 as on Python 3::
- >>> from future.builtins import bytes, int, str
+ >>> from builtins import bytes, int, str
>>> assert isinstance(10, int)
>>> assert isinstance(10**100, int)
@@ -52,7 +52,7 @@ However, note that the last test requires that ``unicode_literals`` be imported
>>> assert isinstance('unicode string 2', str)
This works because the backported types ``int``, ``bytes`` and ``str``
-have metaclasses that override ``__instancecheck__``. See `PEP 3119
+(and others) have metaclasses that override ``__instancecheck__``. See `PEP 3119
`_
for details.
@@ -60,8 +60,8 @@ for details.
Passing data to/from Python 2 libraries
---------------------------------------
-If you are passing any of the backported types (``bytes``, ``str``,
-``int``) into brittle library code that performs type-checks using ``type()``,
+If you are passing any of the backported types (``bytes``, ``int``, ``dict,
+``str``) into brittle library code that performs type-checks using ``type()``,
rather than ``isinstance()``, or requires that you pass Python 2's native types
(rather than subclasses) for some other reason, it may be necessary to upcast
the types from ``future`` to their native superclasses on Py2.
@@ -69,30 +69,30 @@ the types from ``future`` to their native superclasses on Py2.
The ``native`` function in ``future.utils`` is provided for this. Here is how
to use it. (The output showing is from Py2)::
- >>> from future.builtins import *
+ >>> from builtins import int, bytes, str
>>> from future.utils import native
>>> a = int(10**20) # Py3-like long int
>>> a
100000000000000000000
>>> type(a)
- future.builtins.backports.newint.newint
+ future.types.newint.newint
>>> native(a)
100000000000000000000L
>>> type(native(a))
long
-
+
>>> b = bytes(b'ABC')
>>> type(b)
- future.builtins.backports.newbytes.newbytes
+ future.types.newbytes.newbytes
>>> native(b)
'ABC'
>>> type(native(b))
str
-
+
>>> s = str(u'ABC')
>>> type(s)
- future.builtins.backports.newstr.newstr
+ future.types.newstr.newstr
>>> native(s)
u'ABC'
>>> type(native(s))
@@ -115,4 +115,3 @@ The objects ``native_str`` and ``native_bytes`` are available in
The functions ``native_str_to_bytes`` and ``bytes_to_native_str`` are also
available for more explicit conversions.
-
diff --git a/docs/limitations.rst b/docs/limitations.rst
index c822a27f..0d13805d 100644
--- a/docs/limitations.rst
+++ b/docs/limitations.rst
@@ -1,4 +1,3 @@
-
limitations of the ``future`` module and differences between Py2 and Py3 that are not (yet) handled
===================================================================================================
@@ -39,7 +38,7 @@ Also:
b'\x00'[0] != 0
b'\x01'[0] != 1
-
+
``futurize`` does not yet wrap all byte-string literals in a ``bytes()``
call. This is on the to-do list. See :ref:`bytes-object` for more information.
@@ -47,9 +46,7 @@ Also:
Notes
-----
- Ensure you are using new-style classes on Py2. Py3 doesn't require
- inheritance from ``object`` for this, but Py2 does. ``futurize
- --from3`` adds this back in automatically, but ensure you do this too
+ inheritance from ``object`` for this, but Py2 does. ``pasteurize``
+ adds this back in automatically, but ensure you do this too
when writing your classes, otherwise weird breakage when e.g. calling
``super()`` may occur.
-
-
diff --git a/docs/metaclasses.rst b/docs/metaclasses.rst
index c4bcdd00..d40c5a46 100644
--- a/docs/metaclasses.rst
+++ b/docs/metaclasses.rst
@@ -5,16 +5,14 @@ Python 3 and Python 2 syntax for metaclasses are incompatible.
``future`` provides a function (from ``jinja2/_compat.py``) called
:func:`with_metaclass` that can assist with specifying metaclasses
portably across Py3 and Py2. Use it like this::
-
+
from future.utils import with_metaclass
class BaseForm(object):
pass
-
+
class FormType(type):
pass
-
+
class Form(with_metaclass(FormType, BaseForm)):
pass
-
-
diff --git a/docs/notebooks/Writing Python 2-3 compatible code.ipynb b/docs/notebooks/Writing Python 2-3 compatible code.ipynb
new file mode 100644
index 00000000..663ede44
--- /dev/null
+++ b/docs/notebooks/Writing Python 2-3 compatible code.ipynb
@@ -0,0 +1,3167 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Cheat Sheet: Writing Python 2-3 compatible code"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- **Copyright (c):** 2013-2024 Python Charmers, Australia.\n",
+ "- **Author:** Ed Schofield.\n",
+ "- **Licence:** Creative Commons Attribution.\n",
+ "\n",
+ "A PDF version is here: https://python-future.org/compatible_idioms.pdf\n",
+ "\n",
+ "This notebook shows you idioms for writing future-proof code that is compatible with both versions of Python: 2 and 3. It accompanies Ed Schofield's talk at PyCon AU 2014, \"Writing 2/3 compatible code\". (The video is here: .)\n",
+ "\n",
+ "Minimum versions:\n",
+ "\n",
+ " - Python 2: 2.6+\n",
+ " - Python 3: 3.3+"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Setup"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The imports below refer to these ``pip``-installable packages on PyPI:\n",
+ "\n",
+ " import future # pip install future\n",
+ " import builtins # pip install future\n",
+ " import past # pip install future\n",
+ " import six # pip install six\n",
+ "\n",
+ "The following scripts are also ``pip``-installable:\n",
+ "\n",
+ " futurize # pip install future\n",
+ " pasteurize # pip install future\n",
+ "\n",
+ "See https://python-future.org and https://pythonhosted.org/six/ for more information."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Essential syntax differences"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### print"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "print 'Hello'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "print('Hello')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To print multiple strings, import ``print_function`` to prevent Py2 from interpreting it as a tuple:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "print 'Hello', 'Guido'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "from __future__ import print_function # (at top of module)\n",
+ "\n",
+ "print('Hello', 'Guido')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "print >> sys.stderr, 'Hello'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "from __future__ import print_function\n",
+ "\n",
+ "print('Hello', file=sys.stderr)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "print 'Hello',"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "from __future__ import print_function\n",
+ "\n",
+ "print('Hello', end='')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Raising exceptions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "raise ValueError, \"dodgy value\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "raise ValueError(\"dodgy value\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Raising exceptions with a traceback:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "traceback = sys.exc_info()[2]\n",
+ "raise ValueError, \"dodgy value\", traceback"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 3 only:\n",
+ "raise ValueError(\"dodgy value\").with_traceback()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 1\n",
+ "from six import reraise as raise_\n",
+ "# or\n",
+ "from future.utils import raise_\n",
+ "\n",
+ "traceback = sys.exc_info()[2]\n",
+ "raise_(ValueError, \"dodgy value\", traceback)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 2\n",
+ "from future.utils import raise_with_traceback\n",
+ "\n",
+ "raise_with_traceback(ValueError(\"dodgy value\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Exception chaining (PEP 3134):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Setup:\n",
+ "class DatabaseError(Exception):\n",
+ " pass"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 3 only\n",
+ "class FileDatabase:\n",
+ " def __init__(self, filename):\n",
+ " try:\n",
+ " self.file = open(filename)\n",
+ " except IOError as exc:\n",
+ " raise DatabaseError('failed to open') from exc"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "from future.utils import raise_from\n",
+ "\n",
+ "class FileDatabase:\n",
+ " def __init__(self, filename):\n",
+ " try:\n",
+ " self.file = open(filename)\n",
+ " except IOError as exc:\n",
+ " raise_from(DatabaseError('failed to open'), exc)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Testing the above:\n",
+ "try:\n",
+ " fd = FileDatabase('non_existent_file.txt')\n",
+ "except Exception as e:\n",
+ " assert isinstance(e.__cause__, IOError) # FileNotFoundError on Py3.3+ inherits from IOError"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Catching exceptions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "try:\n",
+ " ...\n",
+ "except ValueError, e:\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "try:\n",
+ " ...\n",
+ "except ValueError as e:\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Division"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Integer division (rounding down):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "assert 2 / 3 == 0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "assert 2 // 3 == 0"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\"True division\" (float division):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 3 only:\n",
+ "assert 3 / 2 == 1.5"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "from __future__ import division # (at top of module)\n",
+ "\n",
+ "assert 3 / 2 == 1.5"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\"Old division\" (i.e. compatible with Py2 behaviour):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "a = b / c # with any types"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "from past.utils import old_div\n",
+ "\n",
+ "a = old_div(b, c) # always same as / on Py2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Long integers"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Short integers are gone in Python 3 and ``long`` has become ``int`` (without the trailing ``L`` in the ``repr``)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only\n",
+ "k = 9223372036854775808L\n",
+ "\n",
+ "# Python 2 and 3:\n",
+ "k = 9223372036854775808"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only\n",
+ "bigint = 1L\n",
+ "\n",
+ "# Python 2 and 3\n",
+ "from builtins import int\n",
+ "bigint = int(1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To test whether a value is an integer (of any kind):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "if isinstance(x, (int, long)):\n",
+ " ...\n",
+ "\n",
+ "# Python 3 only:\n",
+ "if isinstance(x, int):\n",
+ " ...\n",
+ "\n",
+ "# Python 2 and 3: option 1\n",
+ "from builtins import int # subclass of long on Py2\n",
+ "\n",
+ "if isinstance(x, int): # matches both int and long on Py2\n",
+ " ...\n",
+ "\n",
+ "# Python 2 and 3: option 2\n",
+ "from past.builtins import long\n",
+ "\n",
+ "if isinstance(x, (int, long)):\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Octal constants"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "0644 # Python 2 only"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "0o644 # Python 2 and 3"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Backtick repr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "`x` # Python 2 only"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "repr(x) # Python 2 and 3"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Metaclasses"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "class BaseForm(object):\n",
+ " pass\n",
+ "\n",
+ "class FormType(type):\n",
+ " pass"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "class Form(BaseForm):\n",
+ " __metaclass__ = FormType\n",
+ " pass"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 3 only:\n",
+ "class Form(BaseForm, metaclass=FormType):\n",
+ " pass"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "from six import with_metaclass\n",
+ "# or\n",
+ "from future.utils import with_metaclass\n",
+ "\n",
+ "class Form(with_metaclass(FormType, BaseForm)):\n",
+ " pass"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Strings and bytes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Unicode (text) string literals"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If you are upgrading an existing Python 2 codebase, it may be preferable to mark up all string literals as unicode explicitly with ``u`` prefixes:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only\n",
+ "s1 = 'The Zen of Python'\n",
+ "s2 = u'きたないのよりきれいな方がいい\\n'\n",
+ "\n",
+ "# Python 2 and 3\n",
+ "s1 = u'The Zen of Python'\n",
+ "s2 = u'きたないのよりきれいな方がいい\\n'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The ``futurize`` and ``python-modernize`` tools do not currently offer an option to do this automatically."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If you are writing code for a new project or new codebase, you can use this idiom to make all string literals in a module unicode strings:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3\n",
+ "from __future__ import unicode_literals # at top of module\n",
+ "\n",
+ "s1 = 'The Zen of Python'\n",
+ "s2 = 'きたないのよりきれいな方がいい\\n'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "See https://python-future.org/unicode_literals.html for more discussion on which style to use."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Byte-string literals"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only\n",
+ "s = 'This must be a byte-string'\n",
+ "\n",
+ "# Python 2 and 3\n",
+ "s = b'This must be a byte-string'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To loop over a byte-string with possible high-bit characters, obtaining each character as a byte-string of length 1:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "for bytechar in 'byte-string with high-bit chars like \\xf9':\n",
+ " ...\n",
+ "\n",
+ "# Python 3 only:\n",
+ "for myint in b'byte-string with high-bit chars like \\xf9':\n",
+ " bytechar = bytes([myint])\n",
+ "\n",
+ "# Python 2 and 3:\n",
+ "from builtins import bytes\n",
+ "for myint in bytes(b'byte-string with high-bit chars like \\xf9'):\n",
+ " bytechar = bytes([myint])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As an alternative, ``chr()`` and ``.encode('latin-1')`` can be used to convert an int into a 1-char byte string:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 3 only:\n",
+ "for myint in b'byte-string with high-bit chars like \\xf9':\n",
+ " char = chr(myint) # returns a unicode string\n",
+ " bytechar = char.encode('latin-1')\n",
+ "\n",
+ "# Python 2 and 3:\n",
+ "from builtins import bytes, chr\n",
+ "for myint in bytes(b'byte-string with high-bit chars like \\xf9'):\n",
+ " char = chr(myint) # returns a unicode string\n",
+ " bytechar = char.encode('latin-1') # forces returning a byte str"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### basestring"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "a = u'abc'\n",
+ "b = 'def'\n",
+ "assert (isinstance(a, basestring) and isinstance(b, basestring))\n",
+ "\n",
+ "# Python 2 and 3: alternative 1\n",
+ "from past.builtins import basestring # pip install future\n",
+ "\n",
+ "a = u'abc'\n",
+ "b = b'def'\n",
+ "assert (isinstance(a, basestring) and isinstance(b, basestring))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 2: refactor the code to avoid considering\n",
+ "# byte-strings as strings.\n",
+ "\n",
+ "from builtins import str\n",
+ "a = u'abc'\n",
+ "b = b'def'\n",
+ "c = b.decode()\n",
+ "assert isinstance(a, str) and isinstance(c, str)\n",
+ "# ..."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### unicode"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "templates = [u\"blog/blog_post_detail_%s.html\" % unicode(slug)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 1\n",
+ "from builtins import str\n",
+ "templates = [u\"blog/blog_post_detail_%s.html\" % str(slug)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 2\n",
+ "from builtins import str as text\n",
+ "templates = [u\"blog/blog_post_detail_%s.html\" % text(slug)]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### StringIO"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "from StringIO import StringIO\n",
+ "# or:\n",
+ "from cStringIO import StringIO\n",
+ "\n",
+ "# Python 2 and 3:\n",
+ "from io import BytesIO # for handling byte strings\n",
+ "from io import StringIO # for handling unicode strings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Imports relative to a package"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Suppose the package is:\n",
+ "\n",
+ " mypackage/\n",
+ " __init__.py\n",
+ " submodule1.py\n",
+ " submodule2.py\n",
+ " \n",
+ "and the code below is in ``submodule1.py``:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only: \n",
+ "import submodule2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "from . import submodule2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "# To make Py2 code safer (more like Py3) by preventing\n",
+ "# implicit relative imports, you can also add this to the top:\n",
+ "from __future__ import absolute_import"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Dictionaries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "heights = {'Fred': 175, 'Anne': 166, 'Joe': 192}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Iterating through ``dict`` keys/values/items"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Iterable dict keys:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "for key in heights.iterkeys():\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "for key in heights:\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Iterable dict values:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "for value in heights.itervalues():\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Idiomatic Python 3\n",
+ "for value in heights.values(): # extra memory overhead on Py2\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 1\n",
+ "from builtins import dict\n",
+ "\n",
+ "heights = dict(Fred=175, Anne=166, Joe=192)\n",
+ "for key in heights.values(): # efficient on Py2 and Py3\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 2\n",
+ "from future.utils import itervalues\n",
+ "# or\n",
+ "from six import itervalues\n",
+ "\n",
+ "for key in itervalues(heights):\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Iterable dict items:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "for (key, value) in heights.iteritems():\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 1\n",
+ "for (key, value) in heights.items(): # inefficient on Py2 \n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 2\n",
+ "from future.utils import viewitems\n",
+ "\n",
+ "for (key, value) in viewitems(heights): # also behaves like a set\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 3\n",
+ "from future.utils import iteritems\n",
+ "# or\n",
+ "from six import iteritems\n",
+ "\n",
+ "for (key, value) in iteritems(heights):\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### dict keys/values/items as a list"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "dict keys as a list:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "keylist = heights.keys()\n",
+ "assert isinstance(keylist, list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "keylist = list(heights)\n",
+ "assert isinstance(keylist, list)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "dict values as a list:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "heights = {'Fred': 175, 'Anne': 166, 'Joe': 192}\n",
+ "valuelist = heights.values()\n",
+ "assert isinstance(valuelist, list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 1\n",
+ "valuelist = list(heights.values()) # inefficient on Py2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 2\n",
+ "from builtins import dict\n",
+ "\n",
+ "heights = dict(Fred=175, Anne=166, Joe=192)\n",
+ "valuelist = list(heights.values())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 3\n",
+ "from future.utils import listvalues\n",
+ "\n",
+ "valuelist = listvalues(heights)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 4\n",
+ "from future.utils import itervalues\n",
+ "# or\n",
+ "from six import itervalues\n",
+ "\n",
+ "valuelist = list(itervalues(heights))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "dict items as a list:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 1\n",
+ "itemlist = list(heights.items()) # inefficient on Py2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 2\n",
+ "from future.utils import listitems\n",
+ "\n",
+ "itemlist = listitems(heights)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 3\n",
+ "from future.utils import iteritems\n",
+ "# or\n",
+ "from six import iteritems\n",
+ "\n",
+ "itemlist = list(iteritems(heights))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Custom class behaviour"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Custom iterators"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only\n",
+ "class Upper(object):\n",
+ " def __init__(self, iterable):\n",
+ " self._iter = iter(iterable)\n",
+ " def next(self): # Py2-style\n",
+ " return self._iter.next().upper()\n",
+ " def __iter__(self):\n",
+ " return self\n",
+ "\n",
+ "itr = Upper('hello')\n",
+ "assert itr.next() == 'H' # Py2-style\n",
+ "assert list(itr) == list('ELLO')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 1\n",
+ "from builtins import object\n",
+ "\n",
+ "class Upper(object):\n",
+ " def __init__(self, iterable):\n",
+ " self._iter = iter(iterable)\n",
+ " def __next__(self): # Py3-style iterator interface\n",
+ " return next(self._iter).upper() # builtin next() function calls\n",
+ " def __iter__(self):\n",
+ " return self\n",
+ "\n",
+ "itr = Upper('hello')\n",
+ "assert next(itr) == 'H' # compatible style\n",
+ "assert list(itr) == list('ELLO')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 2\n",
+ "from future.utils import implements_iterator\n",
+ "\n",
+ "@implements_iterator\n",
+ "class Upper(object):\n",
+ " def __init__(self, iterable):\n",
+ " self._iter = iter(iterable)\n",
+ " def __next__(self): # Py3-style iterator interface\n",
+ " return next(self._iter).upper() # builtin next() function calls\n",
+ " def __iter__(self):\n",
+ " return self\n",
+ "\n",
+ "itr = Upper('hello')\n",
+ "assert next(itr) == 'H'\n",
+ "assert list(itr) == list('ELLO')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Custom ``__str__`` methods"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "class MyClass(object):\n",
+ " def __unicode__(self):\n",
+ " return 'Unicode string: \\u5b54\\u5b50'\n",
+ " def __str__(self):\n",
+ " return unicode(self).encode('utf-8')\n",
+ "\n",
+ "a = MyClass()\n",
+ "print(a) # prints encoded string"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Unicode string: 孔子\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Python 2 and 3:\n",
+ "from future.utils import python_2_unicode_compatible\n",
+ "\n",
+ "@python_2_unicode_compatible\n",
+ "class MyClass(object):\n",
+ " def __str__(self):\n",
+ " return u'Unicode string: \\u5b54\\u5b50'\n",
+ "\n",
+ "a = MyClass()\n",
+ "print(a) # prints string encoded as utf-8 on Py2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Custom ``__nonzero__`` vs ``__bool__`` method:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "class AllOrNothing(object):\n",
+ " def __init__(self, l):\n",
+ " self.l = l\n",
+ " def __nonzero__(self):\n",
+ " return all(self.l)\n",
+ "\n",
+ "container = AllOrNothing([0, 100, 200])\n",
+ "assert not bool(container)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "from builtins import object\n",
+ "\n",
+ "class AllOrNothing(object):\n",
+ " def __init__(self, l):\n",
+ " self.l = l\n",
+ " def __bool__(self):\n",
+ " return all(self.l)\n",
+ "\n",
+ "container = AllOrNothing([0, 100, 200])\n",
+ "assert not bool(container)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Lists versus iterators"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### xrange"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "for i in xrange(10**8):\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: forward-compatible\n",
+ "from builtins import range\n",
+ "for i in range(10**8):\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: backward-compatible\n",
+ "from past.builtins import xrange\n",
+ "for i in xrange(10**8):\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### range"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only\n",
+ "mylist = range(5)\n",
+ "assert mylist == [0, 1, 2, 3, 4]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: forward-compatible: option 1\n",
+ "mylist = list(range(5)) # copies memory on Py2\n",
+ "assert mylist == [0, 1, 2, 3, 4]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: forward-compatible: option 2\n",
+ "from builtins import range\n",
+ "\n",
+ "mylist = list(range(5))\n",
+ "assert mylist == [0, 1, 2, 3, 4]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 3\n",
+ "from future.utils import lrange\n",
+ "\n",
+ "mylist = lrange(5)\n",
+ "assert mylist == [0, 1, 2, 3, 4]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: backward compatible\n",
+ "from past.builtins import range\n",
+ "\n",
+ "mylist = range(5)\n",
+ "assert mylist == [0, 1, 2, 3, 4]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### map"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "mynewlist = map(f, myoldlist)\n",
+ "assert mynewlist == [f(x) for x in myoldlist]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 1\n",
+ "# Idiomatic Py3, but inefficient on Py2\n",
+ "mynewlist = list(map(f, myoldlist))\n",
+ "assert mynewlist == [f(x) for x in myoldlist]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 2\n",
+ "from builtins import map\n",
+ "\n",
+ "mynewlist = list(map(f, myoldlist))\n",
+ "assert mynewlist == [f(x) for x in myoldlist]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 3\n",
+ "try:\n",
+ " from itertools import imap as map\n",
+ "except ImportError:\n",
+ " pass\n",
+ "\n",
+ "mynewlist = list(map(f, myoldlist)) # inefficient on Py2\n",
+ "assert mynewlist == [f(x) for x in myoldlist]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 4\n",
+ "from future.utils import lmap\n",
+ "\n",
+ "mynewlist = lmap(f, myoldlist)\n",
+ "assert mynewlist == [f(x) for x in myoldlist]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 5\n",
+ "from past.builtins import map\n",
+ "\n",
+ "mynewlist = map(f, myoldlist)\n",
+ "assert mynewlist == [f(x) for x in myoldlist]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### imap"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "from itertools import imap\n",
+ "\n",
+ "myiter = imap(func, myoldlist)\n",
+ "assert isinstance(myiter, iter)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 3 only:\n",
+ "myiter = map(func, myoldlist)\n",
+ "assert isinstance(myiter, iter)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 1\n",
+ "from builtins import map\n",
+ "\n",
+ "myiter = map(func, myoldlist)\n",
+ "assert isinstance(myiter, iter)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 2\n",
+ "try:\n",
+ " from itertools import imap as map\n",
+ "except ImportError:\n",
+ " pass\n",
+ "\n",
+ "myiter = map(func, myoldlist)\n",
+ "assert isinstance(myiter, iter)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### zip, izip"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As above with ``zip`` and ``itertools.izip``."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### filter, ifilter"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As above with ``filter`` and ``itertools.ifilter`` too."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Other builtins"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### File IO with open()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only\n",
+ "f = open('myfile.txt')\n",
+ "data = f.read() # as a byte string\n",
+ "text = data.decode('utf-8')\n",
+ "\n",
+ "# Python 2 and 3: alternative 1\n",
+ "from io import open\n",
+ "f = open('myfile.txt', 'rb')\n",
+ "data = f.read() # as bytes\n",
+ "text = data.decode('utf-8') # unicode, not bytes\n",
+ "\n",
+ "# Python 2 and 3: alternative 2\n",
+ "from io import open\n",
+ "f = open('myfile.txt', encoding='utf-8')\n",
+ "text = f.read() # unicode, not bytes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### reduce()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "assert reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) == 1+2+3+4+5"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "from functools import reduce\n",
+ "\n",
+ "assert reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) == 1+2+3+4+5"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### raw_input()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "name = raw_input('What is your name? ')\n",
+ "assert isinstance(name, str) # native str"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "from builtins import input\n",
+ "\n",
+ "name = input('What is your name? ')\n",
+ "assert isinstance(name, str) # native str on Py2 and Py3"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### input()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "input(\"Type something safe please: \")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3\n",
+ "from builtins import input\n",
+ "eval(input(\"Type something safe please: \"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Warning: using either of these is **unsafe** with untrusted input."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### file()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "f = file(pathname)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "f = open(pathname)\n",
+ "\n",
+ "# But preferably, use this:\n",
+ "from io import open\n",
+ "f = open(pathname, 'rb') # if f.read() should return bytes\n",
+ "# or\n",
+ "f = open(pathname, 'rt') # if f.read() should return unicode text"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### exec"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "exec 'x = 10'\n",
+ "\n",
+ "# Python 2 and 3:\n",
+ "exec('x = 10')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "g = globals()\n",
+ "exec 'x = 10' in g\n",
+ "\n",
+ "# Python 2 and 3:\n",
+ "g = globals()\n",
+ "exec('x = 10', g)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "l = locals()\n",
+ "exec 'x = 10' in g, l\n",
+ "\n",
+ "# Python 2 and 3:\n",
+ "exec('x = 10', g, l)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "But note that Py3's `exec()` is less powerful (and less dangerous) than Py2's `exec` statement."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### execfile()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "execfile('myfile.py')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 1\n",
+ "from past.builtins import execfile\n",
+ "\n",
+ "execfile('myfile.py')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 2\n",
+ "exec(compile(open('myfile.py').read()))\n",
+ "\n",
+ "# This can sometimes cause this:\n",
+ "# SyntaxError: function ... uses import * and bare exec ...\n",
+ "# See https://github.com/PythonCharmers/python-future/issues/37"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### unichr()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "assert unichr(8364) == '€'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 3 only:\n",
+ "assert chr(8364) == '€'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "from builtins import chr\n",
+ "assert chr(8364) == '€'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### intern()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "intern('mystring')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 3 only:\n",
+ "from sys import intern\n",
+ "intern('mystring')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 1\n",
+ "from past.builtins import intern\n",
+ "intern('mystring')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 2\n",
+ "from six.moves import intern\n",
+ "intern('mystring')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 3\n",
+ "from future.standard_library import install_aliases\n",
+ "install_aliases()\n",
+ "from sys import intern\n",
+ "intern('mystring')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 2\n",
+ "try:\n",
+ " from sys import intern\n",
+ "except ImportError:\n",
+ " pass\n",
+ "intern('mystring')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### apply()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "args = ('a', 'b')\n",
+ "kwargs = {'kwarg1': True}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "apply(f, args, kwargs)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 1\n",
+ "f(*args, **kwargs)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 2\n",
+ "from past.builtins import apply\n",
+ "apply(f, args, kwargs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### chr()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "assert chr(64) == b'@'\n",
+ "assert chr(200) == b'\\xc8'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 3 only: option 1\n",
+ "assert chr(64).encode('latin-1') == b'@'\n",
+ "assert chr(0xc8).encode('latin-1') == b'\\xc8'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 1\n",
+ "from builtins import chr\n",
+ "\n",
+ "assert chr(64).encode('latin-1') == b'@'\n",
+ "assert chr(0xc8).encode('latin-1') == b'\\xc8'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 3 only: option 2\n",
+ "assert bytes([64]) == b'@'\n",
+ "assert bytes([0xc8]) == b'\\xc8'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: option 2\n",
+ "from builtins import bytes\n",
+ "\n",
+ "assert bytes([64]) == b'@'\n",
+ "assert bytes([0xc8]) == b'\\xc8'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### cmp()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "assert cmp('a', 'b') < 0 and cmp('b', 'a') > 0 and cmp('c', 'c') == 0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 1\n",
+ "from past.builtins import cmp\n",
+ "assert cmp('a', 'b') < 0 and cmp('b', 'a') > 0 and cmp('c', 'c') == 0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 2\n",
+ "cmp = lambda(x, y): (x > y) - (x < y)\n",
+ "assert cmp('a', 'b') < 0 and cmp('b', 'a') > 0 and cmp('c', 'c') == 0"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### reload()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "reload(mymodule)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3\n",
+ "from imp import reload\n",
+ "reload(mymodule)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Standard library"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### dbm modules"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only\n",
+ "import anydbm\n",
+ "import whichdb\n",
+ "import dbm\n",
+ "import dumbdbm\n",
+ "import gdbm\n",
+ "\n",
+ "# Python 2 and 3: alternative 1\n",
+ "from future import standard_library\n",
+ "standard_library.install_aliases()\n",
+ "\n",
+ "import dbm\n",
+ "import dbm.ndbm\n",
+ "import dbm.dumb\n",
+ "import dbm.gnu\n",
+ "\n",
+ "# Python 2 and 3: alternative 2\n",
+ "from future.moves import dbm\n",
+ "from future.moves.dbm import dumb\n",
+ "from future.moves.dbm import ndbm\n",
+ "from future.moves.dbm import gnu\n",
+ "\n",
+ "# Python 2 and 3: alternative 3\n",
+ "from six.moves import dbm_gnu\n",
+ "# (others not supported)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### commands / subprocess modules"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only\n",
+ "from commands import getoutput, getstatusoutput\n",
+ "\n",
+ "# Python 2 and 3\n",
+ "from future import standard_library\n",
+ "standard_library.install_aliases()\n",
+ "\n",
+ "from subprocess import getoutput, getstatusoutput"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### subprocess.check_output()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2.7 and above\n",
+ "from subprocess import check_output\n",
+ "\n",
+ "# Python 2.6 and above: alternative 1\n",
+ "from future.moves.subprocess import check_output\n",
+ "\n",
+ "# Python 2.6 and above: alternative 2\n",
+ "from future import standard_library\n",
+ "standard_library.install_aliases()\n",
+ "\n",
+ "from subprocess import check_output"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### collections: Counter, OrderedDict, ChainMap"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2.7 and above\n",
+ "from collections import Counter, OrderedDict, ChainMap\n",
+ "\n",
+ "# Python 2.6 and above: alternative 1\n",
+ "from future.backports import Counter, OrderedDict, ChainMap\n",
+ "\n",
+ "# Python 2.6 and above: alternative 2\n",
+ "from future import standard_library\n",
+ "standard_library.install_aliases()\n",
+ "\n",
+ "from collections import Counter, OrderedDict, ChainMap"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### StringIO module"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only\n",
+ "from StringIO import StringIO\n",
+ "from cStringIO import StringIO"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3\n",
+ "from io import BytesIO\n",
+ "# and refactor StringIO() calls to BytesIO() if passing byte-strings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### http module"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "import httplib\n",
+ "import Cookie\n",
+ "import cookielib\n",
+ "import BaseHTTPServer\n",
+ "import SimpleHTTPServer\n",
+ "import CGIHttpServer\n",
+ "\n",
+ "# Python 2 and 3 (after ``pip install future``):\n",
+ "import http.client\n",
+ "import http.cookies\n",
+ "import http.cookiejar\n",
+ "import http.server"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### xmlrpc module"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "import DocXMLRPCServer\n",
+ "import SimpleXMLRPCServer\n",
+ "\n",
+ "# Python 2 and 3 (after ``pip install future``):\n",
+ "import xmlrpc.server"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "import xmlrpclib\n",
+ "\n",
+ "# Python 2 and 3 (after ``pip install future``):\n",
+ "import xmlrpc.client"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### html escaping and entities"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3:\n",
+ "from cgi import escape\n",
+ "\n",
+ "# Safer (Python 2 and 3, after ``pip install future``):\n",
+ "from html import escape\n",
+ "\n",
+ "# Python 2 only:\n",
+ "from htmlentitydefs import codepoint2name, entitydefs, name2codepoint\n",
+ "\n",
+ "# Python 2 and 3 (after ``pip install future``):\n",
+ "from html.entities import codepoint2name, entitydefs, name2codepoint"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### html parsing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "from HTMLParser import HTMLParser\n",
+ "\n",
+ "# Python 2 and 3 (after ``pip install future``)\n",
+ "from html.parser import HTMLParser\n",
+ "\n",
+ "# Python 2 and 3 (alternative 2):\n",
+ "from future.moves.html.parser import HTMLParser"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### urllib module"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "``urllib`` is the hardest module to use from Python 2/3 compatible code. You may like to use Requests (https://python-requests.org) instead."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "from urlparse import urlparse\n",
+ "from urllib import urlencode\n",
+ "from urllib2 import urlopen, Request, HTTPError"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 3 only:\n",
+ "from urllib.parse import urlparse, urlencode\n",
+ "from urllib.request import urlopen, Request\n",
+ "from urllib.error import HTTPError"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: easiest option\n",
+ "from future.standard_library import install_aliases\n",
+ "install_aliases()\n",
+ "\n",
+ "from urllib.parse import urlparse, urlencode\n",
+ "from urllib.request import urlopen, Request\n",
+ "from urllib.error import HTTPError"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 2\n",
+ "from future.standard_library import hooks\n",
+ "\n",
+ "with hooks():\n",
+ " from urllib.parse import urlparse, urlencode\n",
+ " from urllib.request import urlopen, Request\n",
+ " from urllib.error import HTTPError"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 3\n",
+ "from future.moves.urllib.parse import urlparse, urlencode\n",
+ "from future.moves.urllib.request import urlopen, Request\n",
+ "from future.moves.urllib.error import HTTPError\n",
+ "# or\n",
+ "from six.moves.urllib.parse import urlparse, urlencode\n",
+ "from six.moves.urllib.request import urlopen\n",
+ "from six.moves.urllib.error import HTTPError"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 and 3: alternative 4\n",
+ "try:\n",
+ " from urllib.parse import urlparse, urlencode\n",
+ " from urllib.request import urlopen, Request\n",
+ " from urllib.error import HTTPError\n",
+ "except ImportError:\n",
+ " from urlparse import urlparse\n",
+ " from urllib import urlencode\n",
+ " from urllib2 import urlopen, Request, HTTPError"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Tkinter"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "import Tkinter\n",
+ "import Dialog\n",
+ "import FileDialog\n",
+ "import ScrolledText\n",
+ "import SimpleDialog\n",
+ "import Tix \n",
+ "import Tkconstants\n",
+ "import Tkdnd \n",
+ "import tkColorChooser\n",
+ "import tkCommonDialog\n",
+ "import tkFileDialog\n",
+ "import tkFont\n",
+ "import tkMessageBox\n",
+ "import tkSimpleDialog\n",
+ "import ttk\n",
+ "\n",
+ "# Python 2 and 3 (after ``pip install future``):\n",
+ "import tkinter\n",
+ "import tkinter.dialog\n",
+ "import tkinter.filedialog\n",
+ "import tkinter.scrolledtext\n",
+ "import tkinter.simpledialog\n",
+ "import tkinter.tix\n",
+ "import tkinter.constants\n",
+ "import tkinter.dnd\n",
+ "import tkinter.colorchooser\n",
+ "import tkinter.commondialog\n",
+ "import tkinter.filedialog\n",
+ "import tkinter.font\n",
+ "import tkinter.messagebox\n",
+ "import tkinter.simpledialog\n",
+ "import tkinter.ttk"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### socketserver"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "import SocketServer\n",
+ "\n",
+ "# Python 2 and 3 (after ``pip install future``):\n",
+ "import socketserver"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### copy_reg, copyreg"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "import copy_reg\n",
+ "\n",
+ "# Python 2 and 3 (after ``pip install future``):\n",
+ "import copyreg"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### configparser"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "from ConfigParser import ConfigParser\n",
+ "\n",
+ "# Python 2 and 3 (after ``pip install future``):\n",
+ "from configparser import ConfigParser"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### queue"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "from Queue import Queue, heapq, deque\n",
+ "\n",
+ "# Python 2 and 3 (after ``pip install future``):\n",
+ "from queue import Queue, heapq, deque"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### repr, reprlib"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "from repr import aRepr, repr\n",
+ "\n",
+ "# Python 2 and 3 (after ``pip install future``):\n",
+ "from reprlib import aRepr, repr"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### UserDict, UserList, UserString"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "from UserDict import UserDict\n",
+ "from UserList import UserList\n",
+ "from UserString import UserString\n",
+ "\n",
+ "# Python 3 only:\n",
+ "from collections import UserDict, UserList, UserString\n",
+ "\n",
+ "# Python 2 and 3: alternative 1\n",
+ "from future.moves.collections import UserDict, UserList, UserString\n",
+ "\n",
+ "# Python 2 and 3: alternative 2\n",
+ "from six.moves import UserDict, UserList, UserString\n",
+ "\n",
+ "# Python 2 and 3: alternative 3\n",
+ "from future.standard_library import install_aliases\n",
+ "install_aliases()\n",
+ "from collections import UserDict, UserList, UserString"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### itertools: filterfalse, zip_longest"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Python 2 only:\n",
+ "from itertools import ifilterfalse, izip_longest\n",
+ "\n",
+ "# Python 3 only:\n",
+ "from itertools import filterfalse, zip_longest\n",
+ "\n",
+ "# Python 2 and 3: alternative 1\n",
+ "from future.moves.itertools import filterfalse, zip_longest\n",
+ "\n",
+ "# Python 2 and 3: alternative 2\n",
+ "from six.moves import filterfalse, zip_longest\n",
+ "\n",
+ "# Python 2 and 3: alternative 3\n",
+ "from future.standard_library import install_aliases\n",
+ "install_aliases()\n",
+ "from itertools import filterfalse, zip_longest"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.4.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/docs/notebooks/bytes object.ipynb b/docs/notebooks/bytes object.ipynb
new file mode 100644
index 00000000..57921442
--- /dev/null
+++ b/docs/notebooks/bytes object.ipynb
@@ -0,0 +1,161 @@
+{
+ "metadata": {
+ "name": ""
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+ {
+ "cells": [
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import sys\n",
+ "sys.version"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 6,
+ "text": [
+ "'2.7.6 (default, Mar 22 2014, 22:59:56) \\n[GCC 4.8.2]'"
+ ]
+ }
+ ],
+ "prompt_number": 6
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import future\n",
+ "future.__version__"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 1,
+ "text": [
+ "'0.12.0-dev'"
+ ]
+ }
+ ],
+ "prompt_number": 1
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "from builtins import bytes"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 2
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "# Backported Py3 bytes object\n",
+ "b = bytes(b'ABCD')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 3
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "list(b)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 4,
+ "text": [
+ "[65, 66, 67, 68]"
+ ]
+ }
+ ],
+ "prompt_number": 4
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "repr(b)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 5,
+ "text": [
+ "\"b'ABCD'\""
+ ]
+ }
+ ],
+ "prompt_number": 5
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "# These raise TypeErrors:\n",
+ "# b + u'EFGH'\n",
+ "# bytes(b',').join([u'Fred', u'Bill'])\n",
+ "# b < u'abcd'"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 10
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "b == u'ABCD'"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 9,
+ "text": [
+ "False"
+ ]
+ }
+ ],
+ "prompt_number": 9
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [],
+ "language": "python",
+ "metadata": {},
+ "outputs": []
+ }
+ ],
+ "metadata": {}
+ }
+ ]
+}
diff --git a/docs/notebooks/object special methods (next, bool, ...).ipynb b/docs/notebooks/object special methods (next, bool, ...).ipynb
new file mode 100644
index 00000000..7da31856
--- /dev/null
+++ b/docs/notebooks/object special methods (next, bool, ...).ipynb
@@ -0,0 +1,246 @@
+{
+ "metadata": {
+ "name": ""
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+ {
+ "cells": [
+ {
+ "cell_type": "heading",
+ "level": 2,
+ "metadata": {},
+ "source": [
+ "``object`` special methods"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import sys\n",
+ "sys.version"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 1,
+ "text": [
+ "'2.7.6 (default, Mar 22 2014, 22:59:56) \\n[GCC 4.8.2]'"
+ ]
+ }
+ ],
+ "prompt_number": 1
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "from builtins import object"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 2
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "object??"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 2
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "# Py3-style iterators written as new-style classes (subclasses of\n",
+ "# future.builtins.object) are backward compatible with Py2:\n",
+ "class Upper(object):\n",
+ " def __init__(self, iterable):\n",
+ " self._iter = iter(iterable)\n",
+ " def __next__(self): # note the Py3 interface\n",
+ " return next(self._iter).upper()\n",
+ " def __iter__(self):\n",
+ " return self"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 3
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "assert list(Upper('hello')) == list('HELLO')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 5
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "class AllOrNothing(object):\n",
+ " def __init__(self, l):\n",
+ " self.l = l\n",
+ " def __bool__(self):\n",
+ " return all(self.l)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 6
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "container = AllOrNothing([0, 100, 200])\n",
+ "bool(container)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 8,
+ "text": [
+ "False"
+ ]
+ }
+ ],
+ "prompt_number": 8
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "container2 = AllOrNothing([-100, 100, 200])\n",
+ "bool(container2)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 9,
+ "text": [
+ "True"
+ ]
+ }
+ ],
+ "prompt_number": 9
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Classes derived from Python builtins don't have this behaviour:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "class AllOrNothingBroken(list):\n",
+ " def __bool__(self):\n",
+ " print('Called!')\n",
+ " return all(self)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 13
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "container3 = AllOrNothingBroken([0, 1, 2])\n",
+ "bool(container3)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 14,
+ "text": [
+ "True"
+ ]
+ }
+ ],
+ "prompt_number": 14
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "But subclasses of ``future`` types do:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "from builtins import list\n",
+ "\n",
+ "class AllOrNothingFixed(list):\n",
+ " def __bool__(self):\n",
+ " print('Called!')\n",
+ " return all(self)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 15
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "container4 = AllOrNothingFixed([0, 1, 2])\n",
+ "bool(container4)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 17,
+ "text": [
+ "True"
+ ]
+ }
+ ],
+ "prompt_number": 17
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [],
+ "language": "python",
+ "metadata": {},
+ "outputs": []
+ }
+ ],
+ "metadata": {}
+ }
+ ]
+}
diff --git a/docs/older_interfaces.rst b/docs/older_interfaces.rst
new file mode 100644
index 00000000..546f92b9
--- /dev/null
+++ b/docs/older_interfaces.rst
@@ -0,0 +1,141 @@
+.. _older-standard-library-interfaces:
+
+Older interfaces
+~~~~~~~~~~~~~~~~
+
+In addition to the direct and ``install_aliases()`` interfaces (described in
+:ref:`standard-library-imports`), ``future`` supports four other interfaces to
+the reorganized standard library. This is largely for historical reasons (for
+versions prior to 0.14).
+
+
+``future.moves`` interface
+__________________________
+
+The ``future.moves`` interface avoids import hooks. It may therefore be more
+robust, at the cost of less idiomatic code. Use it as follows::
+
+ from future.moves import queue
+ from future.moves import socketserver
+ from future.moves.http.client import HTTPConnection
+ # etc.
+
+If you wish to achieve the effect of a two-level import such as this::
+
+ import http.client
+
+portably on both Python 2 and Python 3, note that Python currently does not
+support syntax like this::
+
+ from future.moves import http.client
+
+One workaround is to replace the dot with an underscore::
+
+ import future.moves.http.client as http_client
+
+
+Comparing future.moves and six.moves
+++++++++++++++++++++++++++++++++++++
+
+``future.moves`` and ``six.moves`` provide a similar Python 3-style
+interface to the native standard library module definitions.
+
+The major difference is that the ``future.moves`` package is a real Python package
+(``future/moves/__init__.py``) with real modules provided as ``.py`` files, whereas
+``six.moves`` constructs fake ``_LazyModule`` module objects within the Python
+code and injects them into the ``sys.modules`` cache.
+
+The advantage of ``six.moves`` is that the code fits in a single module that can be
+copied into a project that seeks to eliminate external dependencies.
+
+The advantage of ``future.moves`` is that it is likely to be more robust in the
+face of magic like Django's auto-reloader and tools like ``py2exe`` and
+``cx_freeze``. See issues #51, #53, #56, and #63 in the ``six`` project for
+more detail of bugs related to the ``six.moves`` approach.
+
+
+``import_`` and ``from_import`` functions
+_________________________________________
+
+The functional interface is to use the ``import_`` and ``from_import``
+functions from ``future.standard_library`` as follows::
+
+ from future.standard_library import import_, from_import
+
+ http = import_('http.client')
+ urllib = import_('urllib.request')
+
+ urlopen, urlsplit = from_import('urllib.request', 'urlopen', 'urlsplit')
+
+This interface also works with two-level imports.
+
+
+Context-manager for import hooks
+________________________________
+
+The context-manager interface is via a context-manager called ``hooks``::
+
+ from future.standard_library import hooks
+ with hooks():
+ import socketserver
+ import queue
+ import configparser
+ import test.support
+ import html.parser
+ from collections import UserList
+ from itertools import filterfalse, zip_longest
+ from http.client import HttpConnection
+ import urllib.request
+ # and other moved modules and definitions
+
+This interface is straightforward and effective, using PEP 302 import
+hooks. However, there are reports that this sometimes leads to problems
+(see issue #238). Until this is resolved, it is probably safer to use direct
+imports or one of the other import mechanisms listed above.
+
+
+install_hooks() call (deprecated)
+_________________________________
+
+The last interface to the reorganized standard library is via a call to
+``install_hooks()``::
+
+ from future import standard_library
+ standard_library.install_hooks()
+
+ import urllib
+ f = urllib.request.urlopen('http://www.python.org/')
+
+ standard_library.remove_hooks()
+
+If you use this interface, it is recommended to disable the import hooks again
+after use by calling ``remove_hooks()``, in order to prevent the futurized
+modules from being invoked inadvertently by other modules. (Python does not
+automatically disable import hooks at the end of a module, but keeps them
+active for the life of a process unless removed.)
+
+.. The call to ``scrub_future_sys_modules()`` removes any modules from the
+.. ``sys.modules`` cache (on Py2 only) that have Py3-style names, like ``http.client``.
+.. This can prevent libraries that have their own Py2/3 compatibility code from
+.. importing the ``future.moves`` or ``future.backports`` modules unintentionally.
+.. Code such as this will then fall through to using the Py2 standard library
+.. modules on Py2::
+..
+.. try:
+.. from http.client import HTTPConnection
+.. except ImportError:
+.. from httplib import HTTPConnection
+..
+.. **Requests**: The above snippet is from the `requests
+.. `_ library. As of v0.12, the
+.. ``future.standard_library`` import hooks are compatible with Requests.
+
+
+.. If you wish to avoid changing every reference of ``http.client`` to
+.. ``http_client`` in your code, an alternative is this::
+..
+.. from future.standard_library import http
+.. from future.standard_library.http import client as _client
+.. http.client = client
+
+.. but it has the advantage that it can be used by automatic translation scripts such as ``futurize`` and ``pasteurize``.
diff --git a/docs/open_function.rst b/docs/open_function.rst
index a83c2d8f..7915d8a8 100644
--- a/docs/open_function.rst
+++ b/docs/open_function.rst
@@ -5,26 +5,26 @@ open()
The Python 3 builtin :func:`open` function for opening files returns file
contents as (unicode) strings unless the binary (``b``) flag is passed, as in::
-
+
open(filename, 'rb')
in which case its methods like :func:`read` return Py3 :class:`bytes` objects.
-``future.builtins`` provides an ``open`` function on Py2 that is mostly
-compatible with that on Python 3 (e.g. it offers keyword arguments like
-``encoding``). This maps to the ``open`` backport available in the standard
-library :mod:`io` module on Py2.6 and Py2.7.
+On Py2 with ``future`` installed, the :mod:`builtins` module provides an
+``open`` function that is mostly compatible with that on Python 3 (e.g. it
+offers keyword arguments like ``encoding``). This maps to the ``open`` backport
+available in the standard library :mod:`io` module on Py2.7.
One difference to be aware of between the Python 3 ``open`` and
``future.builtins.open`` on Python 2 is that the return types of methods such
as :func:`read()` from the file object that ``open`` returns are not
automatically cast from native bytes or unicode strings on Python 2 to the
-appropriate ``future.builtins.bytes`` or ``future.builtins.str`` types. If you
+corresponding ``future.builtins.bytes`` or ``future.builtins.str`` types. If you
need the returned data to behave the exactly same way on Py2 as on Py3, you can
cast it explicitly as follows::
from __future__ import unicode_literals
- from future.builtins import *
+ from builtins import open, bytes
data = open('image.png', 'rb').read()
# On Py2, data is a standard 8-bit str with loose Unicode coercion.
@@ -37,4 +37,3 @@ cast it explicitly as follows::
assert data[4] == 13 # integer
# Raises TypeError:
# data + u''
-
diff --git a/docs/other/auto2to3.py b/docs/other/auto2to3.py
new file mode 100644
index 00000000..1f56aa14
--- /dev/null
+++ b/docs/other/auto2to3.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+"""Wrapper to run 2to3 automatically at import time.
+
+Usage:
+ auto2to3 -m mypackage.main_module
+ auto2to3 mypackage/script.py
+
+By default, all modules imported from a subdirectory of the current
+directory will be run through `2to3`. To change this behavior, use the
+`--package` or `--dir` flags to `auto2to3` to specify which packages or
+directories contain Python 2 code that should be converted.
+
+2to3 output is cached on disk between runs for speed.
+
+Based on auto2to3.py by Georg Brandl:
+http://dev.pocoo.org/hg/sandbox/file/tip/auto2to3.py
+"""
+
+import argparse
+import os
+import sys
+# imp was deprecated in python 3.6
+if sys.version_info >= (3, 6):
+ import importlib as imp
+else:
+ import imp
+import runpy
+from io import StringIO
+from pkgutil import ImpImporter, ImpLoader
+import runpy
+import sys
+import tempfile
+
+import lib2to3
+from lib2to3.refactor import RefactoringTool, get_fixers_from_package
+
+fixes = get_fixers_from_package('lib2to3.fixes')
+rt = RefactoringTool(fixes)
+
+PACKAGES = []
+DIRS = []
+
+def maybe_2to3(filename, modname=None):
+ """Returns a python3 version of filename."""
+ need_2to3 = False
+ filename = os.path.abspath(filename)
+ if any(filename.startswith(d) for d in DIRS):
+ need_2to3 = True
+ elif modname is not None and any(modname.startswith(p) for p in PACKAGES):
+ need_2to3 = True
+ if not need_2to3:
+ return filename
+ outfilename = '/_auto2to3_'.join(os.path.split(filename))
+ if (not os.path.exists(outfilename) or
+ os.stat(filename).st_mtime > os.stat(outfilename).st_mtime):
+ try:
+ with open(filename) as file:
+ contents = file.read()
+ contents = rt.refactor_docstring(contents, filename)
+ tree = rt.refactor_string(contents, filename)
+ except Exception as err:
+ raise ImportError("2to3 couldn't convert %r" % filename)
+ outfile = open(outfilename, 'wb')
+ outfile.write(str(tree).encode('utf8'))
+ outfile.close()
+ return outfilename
+
+
+
+class ToThreeImporter(ImpImporter):
+ def find_module(self, fullname, path=None):
+ # this duplicates most of ImpImporter.find_module
+ subname = fullname.split(".")[-1]
+ if subname != fullname and self.path is None:
+ return None
+ if self.path is None:
+ path = None
+ else:
+ path = [os.path.realpath(self.path)]
+ try:
+ file, filename, etc = imp.find_module(subname, path)
+ except ImportError:
+ return None
+ if file and etc[2] == imp.PY_SOURCE:
+ outfilename = maybe_2to3(filename, modname=fullname)
+ if outfilename != filename:
+ file.close()
+ filename = outfilename
+ file = open(filename, 'rb')
+ return ImpLoader(fullname, file, filename, etc)
+
+
+# setup the hook
+sys.path_hooks.append(ToThreeImporter)
+for key in sys.path_importer_cache:
+ if sys.path_importer_cache[key] is None:
+ sys.path_importer_cache[key] = ToThreeImporter(key)
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--package', action='append')
+ parser.add_argument('--dir', action='append')
+ parser.add_argument('-m', action='store', metavar='MODULE')
+ args, rest = parser.parse_known_args()
+ if args.package:
+ PACKAGES.extend(args.package)
+ if args.dir:
+ DIRS.extend(os.path.abspath(d) for d in args.dir)
+ if not PACKAGES and not DIRS:
+ DIRS.append(os.getcwd())
+ if args.m:
+ sys.argv[1:] = rest
+ runpy.run_module(args.m, run_name='__main__', alter_sys=True)
+ elif rest:
+ sys.argv = rest
+ converted = maybe_2to3(rest[0])
+ with open(converted) as f:
+ new_globals = dict(__name__='__main__',
+ __file__=rest[0])
+ exec(f.read(), new_globals)
+ else:
+ import code
+ code.interact()
+
+if __name__ == '__main__':
+ main()
diff --git a/docs/other/find_pattern.py b/docs/other/find_pattern.py
index 679a1d64..1a5da35e 100644
--- a/docs/other/find_pattern.py
+++ b/docs/other/find_pattern.py
@@ -38,6 +38,7 @@
Larger snippets can be placed in a file (as opposed to a command-line
arg) and processed with the -f option.
"""
+from __future__ import print_function
__author__ = "Collin Winter "
@@ -65,7 +66,7 @@ def main(args):
elif len(args) > 1:
tree = driver.parse_stream(StringIO(args[1] + "\n"))
else:
- print >>sys.stderr, "You must specify an input file or an input string"
+ print("You must specify an input file or an input string", file=sys.stderr)
return 1
examine_tree(tree)
@@ -75,10 +76,10 @@ def examine_tree(tree):
for node in tree.post_order():
if isinstance(node, pytree.Leaf):
continue
- print repr(str(node))
+ print(repr(str(node)))
verdict = raw_input()
if verdict.strip():
- print find_pattern(node)
+ print(find_pattern(node))
return
def find_pattern(node):
diff --git a/docs/other/fix_notebook_html_colour.py b/docs/other/fix_notebook_html_colour.py
new file mode 100755
index 00000000..36c2205f
--- /dev/null
+++ b/docs/other/fix_notebook_html_colour.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+A script to re-enable colour in .html files produced from IPython notebooks.
+
+Based on a script in a GitHub gist with this copyright notice:
+
+#----------------------------------------------------------------------------
+# Copyright (c) 2013 - Damián Avila
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# A little snippet to fix @media print issue printing slides from IPython
+#-----------------------------------------------------------------------------
+"""
+
+import io
+import sys
+
+notebook = sys.argv[1]
+assert notebook.endswith('.html')
+# notebook = 'jevans.ipynb'
+path = notebook[:-5] + '.html'
+flag = u'@media print{*{text-shadow:none !important;color:#000 !important'
+
+with io.open(path, 'r') as in_file:
+ data = in_file.readlines()
+ for i, line in enumerate(data):
+ if line[:64] == flag:
+ data[i] = data[i].replace('color:#000 !important;', '')
+
+with io.open(path, 'w') as out_file:
+ out_file.writelines(data)
+
+print("You can now print your slides")
diff --git a/docs/other/lessons.txt b/docs/other/lessons.txt
index 5794f496..ede523cb 100644
--- a/docs/other/lessons.txt
+++ b/docs/other/lessons.txt
@@ -30,7 +30,7 @@ Python 2:
Python 3:
>>> array.array(b'b')
TypeError: must be a unicode character, not bytes
-
+
>>> array.array(u'b')
array('b')
@@ -47,5 +47,3 @@ Running test_bytes.py from Py3 on Py2 (after fixing imports) gives this:
Ran 203 tests in 0.209s
FAILED (failures=31, errors=55, skipped=1)
-
-
diff --git a/docs/other/upload_future_docs.sh b/docs/other/upload_future_docs.sh
index 09672b7b..04470f3f 100644
--- a/docs/other/upload_future_docs.sh
+++ b/docs/other/upload_future_docs.sh
@@ -1,21 +1,23 @@
-On the local machine
---------------------
+# On the local machine
-git checkout v0.7.0
+git checkout v0.16.0 # or whatever
rm -Rf docs/build/
cd docs; make html
+cp cheatsheet.pdf ~/shared/
cd build
-touch ../../python-future-html-docs.zip
-rm ../../python-future-html-docs.zip
-zip -r ../../python-future-html-docs.zip *
-scp ../../python-future-html-docs.zip python-future.org:
-ssh python-future.org
+touch ~/shared/python-future-html-docs.zip
+rm ~/shared/python-future-html-docs.zip
+zip -r ~/shared/python-future-html-docs.zip *
+scp ~/shared/python-future-html-docs.zip ubuntu@python-future.org:
+scp ~/shared/cheatsheet.pdf ubuntu@python-future.org:
+ssh ubuntu@python-future.org
-On the remote machine:
-----------------------
-cd /var/www/python-future/html
-unzip ~/python-future-html-docs.zip
-chmod a+r * _static/*
+# On the remote machine:
+cd /var/www/python-future.org/
+unzip -o ~/python-future-html-docs.zip
+chmod a+r * html/* html/_static/*
+cp ~/cheatsheet.pdf ./html/compatible_idioms.pdf
+cp ~/cheatsheet.pdf ./html/cheatsheet.pdf
diff --git a/docs/other/useful_links.txt b/docs/other/useful_links.txt
index 8dec2f9b..abb96849 100644
--- a/docs/other/useful_links.txt
+++ b/docs/other/useful_links.txt
@@ -23,7 +23,7 @@ http://lucumr.pocoo.org/2011/12/7/thoughts-on-python3/
http://python3porting.com/fixers.html
http://washort.twistedmatrix.com/2010/11/unicode-in-python-and-how-to-prevent-it.html
http://docs.python.org/release/3.0.1/whatsnew/3.0.html
-https://pypi.python.org/pypi/unicode-nazi
+https://pypi.org/project/unicode-nazi/
http://www.rmi.net/~lutz/strings30.html
"Porting your code to Python 3": Alexandre Vassalotti: peadrop.com/slides/mp5.pdf
@@ -43,7 +43,7 @@ python-modernize: https://github.com/mitsuhiko/python-modernize
2to3 docs describing the different fixers: http://docs.python.org/2/library/2to3.html
-Injecting code into running Python processes (hopefully not needed): https://pypi.python.org/pypi/pyrasite/2.0
+Injecting code into running Python processes (hopefully not needed): https://pypi.org/project/pyrasite/2.0/
Withdrawn PEP to help with the Py3k standard library transition: http://www.peps.io/364/
@@ -52,7 +52,7 @@ Import hooks
http://www.peps.io/302/
"Hacking Python imports ... for fun and profit": blog post from 2012-05: http://xion.org.pl/2012/05/06/hacking-python-imports/
-Full importlib backport to Py2: https://pypi.python.org/pypi/backport_importlib/0...1
+Full importlib backport to Py2: https://pypi.org/project/backport_importlib/0...1/
Python 2.7 importlib subset: http://docs.python.org/2/whatsnew/2.7.html#importlib-section
@@ -78,7 +78,7 @@ PEPs: 358, 3112, 3137, 3138
http://python3porting.com/noconv.html#unicode-section
Unicode literals u'...' back in Python 3.3: http://www.python.org/dev/peps/pep-0414/
https://github.com/django/django/blob/master/django/utils/encoding.py
-https://pypi.python.org/pypi/unicode-nazi
+https://pypi.org/project/unicode-nazi/
http://docs.python.org/3/library/stdtypes.html#bytes-methods
http://wolfprojects.altervista.org/talks/unicode-and-python-3/
Buffer protocol (which bytes and bytes-like objects obey): http://docs.python.org/3.3/c-api/buffer.html#bufferobjects
@@ -86,7 +86,7 @@ Buffer protocol (which bytes and bytes-like objects obey): http://docs.python.or
Python's future
----------------
-https://ncoghlan_devs-python-notes.readthedocs.org/en/latest/python3/questions_and_answers.html
+https://ncoghlan-devs-python-notes.readthedocs.io/en/latest/python3/questions_and_answers.html
http://www.ironfroggy.com/software/i-am-worried-about-the-future-of-python
@@ -104,8 +104,7 @@ Also: typecheck module on PyPI
To categorize
-------------
-https://pypi.python.org/pypi/awkwardduet/1.1a4
+https://pypi.org/project/awkwardduet/1.1a4/
https://github.com/campadrenalin/persei/blob/master/persei.py
http://slideshare.net/dabeaz/mastering-python-3-io
http://rmi.net/~lutz/strings30.html
-
diff --git a/docs/overview.rst b/docs/overview.rst
index 692ac483..72a33558 100644
--- a/docs/overview.rst
+++ b/docs/overview.rst
@@ -1,105 +1 @@
-.. _overview:
-
-Overview
-========
-
-``future`` is the missing compatibility layer between Python 3 and Python
-2. It allows you to maintain a single, clean Python 3.x-compatible
-codebase with minimal cruft and run it easily on Python 2 mostly unchanged.
-
-``future`` comes with ``futurize``, a script that helps you to transition
-to supporting both Python 2 and 3 in a single codebase, module by module.
-
-.. _features:
-
-Features
---------
-
-- provides backports and remappings for 16 builtins with different
- semantics on Py3 versus Py2
-- provides backports and remappings from the Py3 standard library
-- 300+ unit tests
-- ``futurize`` script based on ``2to3``, ``3to2`` and parts of
- ``python-modernize`` for automatic conversion from either Py2 or Py3 to a
- clean single-source codebase compatible with Python 2.6+ and Python 3.3+.
-- a consistent set of utility functions and decorators selected from
- Py2/3 compatibility interfaces from projects like ``six``, ``IPython``,
- ``Jinja2``, ``Django``, and ``Pandas``.
-
-
-.. _code-examples:
-
-Code examples
--------------
-
-``future`` is designed to be imported at the top of each Python module
-together with Python's built-in ``__future__`` module. For example, this
-code behaves the same way on Python 2.6/2.7 after these imports as it does
-on Python 3::
-
- from __future__ import absolute_import, division, print_function
- from future import bytes, str, open, super, zip, round, input, int
-
- # Backported Py3 bytes object
- b = bytes(b'ABCD')
- assert list(b) == [65, 66, 67, 68]
- assert repr(b) == "b'ABCD'"
- # These raise TypeErrors:
- # b + u'EFGH'
- # bytes(b',').join([u'Fred', u'Bill'])
-
- # Backported Py3 str object
- s = str(u'ABCD')
- assert s != bytes(b'ABCD')
- assert isinstance(s.encode('utf-8'), bytes)
- assert isinstance(b.decode('utf-8'), str)
- assert repr(s) == 'ABCD' # consistent repr with Py3 (no u prefix)
- # These raise TypeErrors:
- # bytes(b'B') in s
- # s.find(bytes(b'A'))
-
- # Extra arguments for the open() function
- f = open('japanese.txt', encoding='utf-8', errors='replace')
-
- # New simpler super() function:
- class VerboseList(list):
- def append(self, item):
- print('Adding an item')
- super().append(item)
-
- # New iterable range object with slicing support
- for i in range(10**15)[:10]:
- pass
-
- # Other iterators: map, zip, filter
- my_iter = zip(range(3), ['a', 'b', 'c'])
- assert my_iter != list(my_iter)
-
- # The round() function behaves as it does in Python 3, using
- # "Banker's Rounding" to the nearest even last digit:
- assert round(0.1250, 2) == 0.12
-
- # input() replaces Py2's raw_input() (with no eval()):
- name = input('What is your name? ')
- print('Hello ' + name)
-
- # Compatible output from isinstance() across Py2/3:
- assert isinstance(2**64, int) # long integers
- assert isinstance(u'blah', str)
- assert isinstance('blah', str) # if unicode_literals is in effect
-
-There is also support for renamed standard library modules in the form of a context manager that provides import hooks::
-
- from future import standard_library
-
- with standard_library.enable_hooks():
- from http.client import HttpConnection
- from itertools import filterfalse
- import html.parser
- import queue
-
-
-Next steps
-----------
-Check out the :ref:`quickstart-guide`.
-
+.. include:: ../README.rst
diff --git a/docs/pasteurize.rst b/docs/pasteurize.rst
new file mode 100644
index 00000000..070b5d1a
--- /dev/null
+++ b/docs/pasteurize.rst
@@ -0,0 +1,45 @@
+.. _backwards-conversion:
+
+``pasteurize``: Py3 to Py2/3
+----------------------------
+
+Running ``pasteurize -w mypy3module.py`` turns this Python 3 code::
+
+ import configparser
+ import copyreg
+
+ class Blah:
+ pass
+ print('Hello', end=None)
+
+into this code which runs on both Py2 and Py3::
+
+ from __future__ import print_function
+ from future import standard_library
+ standard_library.install_hooks()
+
+ import configparser
+ import copyreg
+
+ class Blah(object):
+ pass
+ print('Hello', end=None)
+
+Notice that both ``futurize`` and ``pasteurize`` create explicit new-style
+classes that inherit from ``object`` on both Python versions, and both
+refer to stdlib modules (as well as builtins) under their Py3 names.
+
+Note also that the ``configparser`` module is a special case; there is a full
+backport available on PyPI (https://pypi.org/project/configparser/), so, as
+of v0.16.0, ``python-future`` no longer provides a ``configparser`` package
+alias. To use the resulting code on Py2, install the ``configparser`` backport
+with ``pip install configparser`` or by adding it to your ``requirements.txt``
+file.
+
+``pasteurize`` also handles the following Python 3 features:
+
+- keyword-only arguments
+- metaclasses (using :func:`~future.utils.with_metaclass`)
+- extended tuple unpacking (PEP 3132)
+
+To handle function annotations (PEP 3107), see :ref:`func_annotations`.
diff --git a/docs/porting.rst b/docs/porting.rst
deleted file mode 100644
index 96bbab27..00000000
--- a/docs/porting.rst
+++ /dev/null
@@ -1,112 +0,0 @@
-.. _porting:
-
-Python 3 porting cheat-sheet
-============================
-
-Instructions and notes on porting code from Python 2 to both Python 3 and 2 using ``future``:
-
-.. _porting-setup:
-
-Step 0: setup
--------------
-
-Step 0 goal: set up and see the tests passing on Python 2 and failing on Python 3.
-
-a. Clone the package from github/bitbucket. Rename your repo to ``package-future``. Examples: ``reportlab-future``, ``paramiko-future``, ``mezzanine-future``.
-b. Create and activate a Python 2 virtualenv. Install the package with ``python setup.py install`` and run its test suite on Py2.7 or Py2.6 (e.g. ``python setup.py test`` or ``py.test`` or ``nosetests``)
-c. Optionally: if there’s a ``.travis.yml`` file, add Python version 3.3 and remove any versions < 2.6.
-d. Install Python 3.3 with e.g. ``sudo apt-get install python3``. On other platforms, an easy way is to use Miniconda3. See `Miniconda3 `_. Then e.g.::
-
- conda create -n py33 python=3.3
-
-.. _porting-step1:
-
-Step 1: modern Py2 code
------------------------
-
-The goal for this step is to modernize the Python 2 code without introducing any dependencies (on ``future`` or e.g. ``six``) at this stage.
-
- 1a. Install ``future`` into the virtualenv using::
-
- pip install future
-
- 1b. Run ``futurize --stage1 -w *.py subdir1/*.py subdir2/*.py``
-
- 1c. Commit all changes
-
- 1d. Re-run the test suite and fix any errors.
-
-See :ref:`forwards-conversion-stage1` for more info.
-
-
-Example error
-~~~~~~~~~~~~~
-
-One relatively common error after conversion is::
-
- Traceback (most recent call last):
- ...
- File "/home/user/Install/BleedingEdge/reportlab/tests/test_encrypt.py", line 19, in
- from .test_pdfencryption import parsedoc
- ValueError: Attempted relative import in non-package
-
-If you get this error, try adding an empty ``__init__.py`` file in the package
-directory. (In this example, in the tests/ directory.) If this doesn’t help,
-and if this message appears for all tests, they must be invoked differently
-(from the cmd line or e.g. ``setup.py``). The way to run a module inside a
-package on Python 3, or on Python 2 with ``absolute_import`` in effect, is::
-
- python -m tests.test_platypus_xref
-
-(For more info, see `PEP 328 `_ and the `PEP 8 `_ section on absolute imports.)
-
-
-.. _porting-step2:
-
-Step 2: working Py3 code that still supports Py2
-------------------------------------------------
-
-The goal for this step is to get the tests passing first on Py3 and then on Py2
-again with the help of the ``future`` package.
-
-2a. Run::
-
- futurize —-stage2 myfolder/*.py
-
-This adds this further import to each module::
-
- from __future__ import unicode_literals
-
-All strings are then unicode (on Py2 as on Py3) unless explicitly marked with a ``b''`` prefix.
-
-It also makes other conversions needed to support both Python 2 and 3. These will likely
-require additional imports from ``future``, such as::
-
- from future import standard_library
- from future.builtins import bytes
- from future.builtins import open
-
-If you would like ``futurize`` to import all the changed builtins to have their Python 3 semantics on Python 2, invoke it like this::
-
- futurize --stage2 --all-imports myfolder/*.py
-
-
-2b. Re-run your tests on Py3 now. Make changes until your tests pass on Python 3.
-
-2c. Commit your changes! :)
-
-2d. Now run your tests on Python 2 and notice the errors. Add wrappers from ``future`` to re-enable Python 2 compatibility:
-
- - :func:`utils.reraise()` function for raising exceptions compatibly
- - ``bytes(b'blah')`` instead of ``b'blah'``
- - ``str('my string')`` instead of ``'my string'`` if you need to enforce Py3’s strict type-checking on Py2
- - ``int(1234)`` instead of ``1234`` if you want to enforce a Py3-like long integer
- - :func:`@utils.implements_iterator` decorator for any custom iterator class with a ``.__next__()`` method (which used to be ``.next()``)
- - :func:`@utils.python_2_unicode_compatible` decorator for any class with a ``__str__`` method (which used to be ``__unicode__``).
- - :func:`utils.with_metaclass` to define any metaclasses.
-
-See :ref:`what-else` for more info.
-
-After each change, re-run the tests on Py3 and Py2 to ensure they pass on both.
-
-2e. You’re done! Celebrate! Push your code and announce to the world! Hashtag #python-future
diff --git a/docs/quickstart.rst b/docs/quickstart.rst
index d9d6d847..8461a1a2 100644
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@@ -16,7 +16,8 @@ To install the latest stable version, type::
pip install future
-If you would prefer the latest development version, it is available `here `_.
+If you would prefer the latest development version, it is available `here
+`_.
If you are writing code from scratch
@@ -26,32 +27,39 @@ The easiest way is to start each new module with these lines::
from __future__ import (absolute_import, division,
print_function, unicode_literals)
- from future.builtins import *
+ from builtins import *
Then write standard Python 3 code. The :mod:`future` package will
-provide support for running your code on Python 2.6 and 2.7 mostly unchanged.
+provide support for running your code on Python 2.7, and 3.4+ mostly
+unchanged.
-See :ref:`what-else` for more details.
+- For explicit import forms, see :ref:`explicit-imports`.
+- For more details, see :ref:`what-else`.
+- For a cheat sheet, see :ref:`compatible-idioms`.
To convert existing Python 3 code
---------------------------------
-To offer backward compatibility with Python 2, you can use the ``futurize``
-script with the ``--from3`` parameter. This adds these lines at the top of each
+To offer backward compatibility with Python 2 from your Python 3 code,
+you can use the ``pasteurize`` script. This adds these lines at the top of each
module::
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
- from future.builtins import open
- from future.builtins import str
+
+ from builtins import open
+ from builtins import str
# etc., as needed
-
-and converts a few Python 3-only constructs to a form compatible with
-both Py3 and Py2. Most remaining Python 3 code should simply work on
-Python 2.
+
+ from future import standard_library
+ standard_library.install_aliases()
+
+and converts several Python 3-only constructs (like keyword-only arguments) to a
+form compatible with both Py3 and Py2. Most remaining Python 3 code should
+simply work on Python 2.
See :ref:`backwards-conversion` for more details.
@@ -59,7 +67,9 @@ See :ref:`backwards-conversion` for more details.
To convert existing Python 2 code
---------------------------------
-Start with the :ref:`automatic-conversion` page.
+.. include:: futurize_overview.rst
+
+See :ref:`forwards-conversion-stage1` and :ref:`forwards-conversion-stage2` for more details.
.. If you already know Python 3, start with the :ref:`automatic-conversion` page.
.. If you don't know Python 3 yet, start with :ref:`python3-essentials`.
@@ -70,103 +80,72 @@ Start with the :ref:`automatic-conversion` page.
Standard library reorganization
-------------------------------
-:mod:`future` supports the standard library reorganization (PEP 3108)
-via import hooks, allowing almost all moved standard library modules to
-be accessed under their Python 3 names and locations in Python 2::
-
+:mod:`future` supports the standard library reorganization (PEP 3108) via
+one of several mechanisms, allowing most moved standard library modules
+to be accessed under their Python 3 names and locations in Python 2::
+
from future import standard_library
-
- with standard_library.enable_hooks():
- import socketserver
- import queue
- import configparser
- import test.support
- import html.parser
- from collections import UserList
- from itertools import filterfalse, zip_longest
- from http.client import HttpConnection
- # and other moved modules and definitions
-
-:mod:`future` also includes backports for these stdlib modules from Py3
-that were heavily refactored versus Py2::
-
- with standard_library.enable_hooks():
- import html
- import html.entities
- import html.parser
-
- import http
- import http.client
- import http.server
-
-These modules are currently not supported, but we aim to support them in
-the future::
-
- with standard_library.enable_hooks():
- import http.cookies
- import http.cookiejar
-
- import urllib
- import urllib.parse
- import urllib.request
- import urllib.error
-
-If you need one of these, please open an issue `here
-`_.
+ standard_library.install_aliases()
-For more information on interfaces that have changed in the standard library
-between Python 2 and Python 3, see :ref:`stdlib-incompatibilities`.
+ # Then these Py3-style imports work on both Python 2 and Python 3:
+ import socketserver
+ import queue
+ from collections import UserDict, UserList, UserString
+ from collections import ChainMap # even on Py2.7
+ from itertools import filterfalse, zip_longest
+ import html
+ import html.entities
+ import html.parser
-.. _utilities-guide:
+ import http
+ import http.client
+ import http.server
+ import http.cookies
+ import http.cookiejar
-Utilities
----------
+ import urllib.request
+ import urllib.parse
+ import urllib.response
+ import urllib.error
+ import urllib.robotparser
-:mod:`future` also provides some useful functions and decorators to ease
-backward compatibility with Py2 in the :mod:`future.utils` module. These
-are a selection of the most useful functions from ``six`` and various
-home-grown Py2/3 compatibility modules from popular Python projects, such as
-Jinja2, Pandas, IPython, and Django. The goal is to consolidate these in one
-place, tested and documented, obviating the need for every project to repeat
-this work.
+ import xmlrpc.client
+ import xmlrpc.server
-Examples::
+and others. For a complete list, see :ref:`direct-imports`.
- # Functions like print() expect __str__ on Py2 to return a byte
- # string. This decorator maps the __str__ to __unicode__ on Py2 and
- # defines __str__ to encode it as utf-8:
+.. _py2-dependencies:
- from future.utils import python_2_unicode_compatible
+Python 2-only dependencies
+--------------------------
- @python_2_unicode_compatible
- class MyClass(object):
- def __str__(self):
- return u'Unicode string: \u5b54\u5b50'
- a = MyClass()
+If you have dependencies that support only Python 2, you may be able to use the
+``past`` module to automatically translate these Python 2 modules to Python 3
+upon import. First, install the Python 2-only package into your Python 3
+environment::
- # This then prints the Chinese characters for Confucius:
- print(a)
+ $ pip3 install mypackagename --no-compile # to ignore SyntaxErrors
+(or use ``pip`` if this points to your Py3 environment.)
- # Iterators on Py3 require a __next__() method, whereas on Py2 this
- # is called next(). This decorator allows Py3-style iterators to work
- # identically on Py2:
+Then add the following code at the top of your (Py3 or Py2/3-compatible)
+code::
- @implements_iterator
- class Upper(object):
- def __init__(self, iterable):
- self._iter = iter(iterable)
- def __next__(self): # note the Py3 interface
- return next(self._iter).upper()
- def __iter__(self):
- return self
+ from past.translation import autotranslate
+ autotranslate(['mypackagename'])
+ import mypackagename
- print(list(Upper('hello')))
- # prints ['H', 'E', 'L', 'L', 'O']
+This feature is experimental, and we would appreciate your feedback on
+how well this works or doesn't work for you. Please file an issue `here
+`_.
-On Python 3 these decorators are no-ops.
+For more information on the automatic translation feature, see :ref:`translation`.
-For more information, see :ref:`what-else`.
+Next steps
+----------
+For more information about writing Py2/3-compatible code, see:
+- :ref:`compatible-idioms`
+- :ref:`what-else`.
diff --git a/docs/reference.rst b/docs/reference.rst
index ca41a900..d9ac5e12 100644
--- a/docs/reference.rst
+++ b/docs/reference.rst
@@ -1,10 +1,10 @@
-###############
-Reference Guide
-###############
+API Reference (in progress)
+***************************
-.. *NOTE: These are still a work in progress... We need to go through our
-.. docstrings and make them sphinx-compliant, and figure out how to improve
-.. formatting with the sphinx-bootstrap-theme plugin.*
+**NOTE: This page is still a work in progress... We need to go through our
+docstrings and make them sphinx-compliant, and figure out how to improve
+formatting with the sphinx-bootstrap-theme plugin. Pull requests would be
+very welcome.**
.. contents::
@@ -12,11 +12,21 @@ Reference Guide
:depth: 2
future.builtins Interface
-============================
+=========================
.. automodule:: future.builtins
:members:
+.. Docs are also in future-builtins.rst. Extract these and put them into the
+.. relevant docstrings.
+
+
+Backported types from Python 3
+==============================
+
+.. automodule:: future.types
+ :members:
+
future.standard_library Interface
=================================
@@ -32,11 +42,40 @@ future.utils Interface
:members:
-Backported types
-================
+past.builtins Interface
+=========================
+
+.. automodule:: past.builtins
+ :members:
+
+.. Docs are also in future-builtins.rst. Extract these and put them into the
+.. relevant docstrings.
+
+
+Forward-ported types from Python 2
+==================================
+
+.. automodule:: past.types
+ :members:
-.. autoclass:: future.builtins.backports.newbytes
-.. autoclass:: future.builtins.backports.newstr
-.. autoclass:: future.builtins.backports.newint
+.. bytes
+.. -----
+.. .. automodule:: future.types.newbytes
+..
+.. dict
+.. -----
+.. .. automodule:: future.types.newdict
+..
+.. int
+.. ---
+.. .. automodule:: future.builtins.backports.newint
+..
+.. range
+.. -----
+.. .. automodule:: future.types.newrange
+..
+.. str
+.. ---
+.. .. automodule:: future.types.newstr
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 00000000..265642f4
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,3 @@
+sphinx==3.2.1
+Pallets-Sphinx-Themes==2.2.1
+setuptools==70.0.0
diff --git a/docs/roadmap.rst b/docs/roadmap.rst
index a759796f..c5020d5e 100644
--- a/docs/roadmap.rst
+++ b/docs/roadmap.rst
@@ -5,14 +5,17 @@ futurize script
---------------
1. "Safe" mode -- from Py2 to modern Py2 or Py3 to more-compatible Py3
+
- Split the fixers into two categories: safe and bold
- Safe is highly unlikely to break existing Py2 or Py3 support. The
output of this still requires :mod:`future` imports. Examples:
- - Compatible metaclass syntax on Py3
- - Explicit inheritance from object on Py3
-
+
+ - Compatible metaclass syntax on Py3
+ - Explicit inheritance from object on Py3
+
- Bold might make assumptions about which strings on Py2 should be
unicode strings and which should be bytestrings.
+
- We should also build up a database of which standard library
interfaces on Py2 and Py3 accept unicode strings versus
byte-strings, which have changed, and which haven't.
@@ -34,11 +37,10 @@ Experimental:
from future import new_metaclass_syntax
from future import new_style_classes
-- Maybe::
+- [Done] Maybe::
from future.builtins import str
should import a custom str is a Py3 str-like object which inherits from unicode and
removes the decode() method and has any other Py3-like behaviours
(possibly stricter casting?)
-
diff --git a/docs/standard_library_imports.rst b/docs/standard_library_imports.rst
new file mode 100644
index 00000000..c09e9e30
--- /dev/null
+++ b/docs/standard_library_imports.rst
@@ -0,0 +1,181 @@
+.. _standard-library-imports:
+
+Standard library imports
+------------------------
+
+:mod:`future` supports the standard library reorganization (PEP 3108) through
+several mechanisms.
+
+.. _direct-imports:
+
+Direct imports
+~~~~~~~~~~~~~~
+
+As of version 0.14, the ``future`` package comes with top-level packages for
+Python 2.x that provide access to the reorganized standard library modules
+under their Python 3.x names.
+
+Direct imports are the preferred mechanism for accessing the renamed standard
+library modules in Python 2/3 compatible code. For example, the following clean
+Python 3 code runs unchanged on Python 2 after installing ``future``::
+
+ >>> # Alias for future.builtins on Py2:
+ >>> from builtins import str, open, range, dict
+
+ >>> # Top-level packages with Py3 names provided on Py2:
+ >>> import queue
+ >>> import tkinter.dialog
+ >>> etc.
+
+Notice that this code actually runs on Python 3 without the presence of the
+``future`` package.
+
+Of the 44 modules that were refactored with PEP 3108 (standard library
+reorganization), 29 are supported with direct imports in the above manner. The
+complete list is here::
+
+ ### Renamed modules:
+
+ import builtins
+
+ import copyreg
+
+ import html
+ import html.entities
+ import html.parser
+
+ import http.client
+ import http.cookies
+ import http.cookiejar
+ import http.server
+
+ import queue
+
+ import reprlib
+
+ import socketserver
+
+ from tkinter import colorchooser
+ from tkinter import commondialog
+ from tkinter import constants
+ from tkinter import dialog
+ from tkinter import dnd
+ from tkinter import filedialog
+ from tkinter import font
+ from tkinter import messagebox
+ from tkinter import scrolledtext
+ from tkinter import simpledialog
+ from tkinter import tix
+ from tkinter import ttk
+
+ import winreg # Windows only
+
+ import xmlrpc.client
+ import xmlrpc.server
+
+ import _dummy_thread
+ import _markupbase
+ import _thread
+
+Note that, as of v0.16.0, ``python-future`` no longer includes an alias for the
+``configparser`` module because a full backport exists (see https://pypi.org/project/configparser/).
+
+.. _list-standard-library-refactored:
+
+Aliased imports
+~~~~~~~~~~~~~~~
+
+The following 14 modules were refactored or extended from Python 2.7 to 3.x
+but were neither renamed in Py3.x nor were the new APIs backported to Py2.x.
+This precludes compatibility interfaces that work out-of-the-box. Instead, the
+``future`` package makes the Python 3.x APIs available on Python 2.x as
+follows::
+
+ from future.standard_library import install_aliases
+ install_aliases()
+
+ from collections import UserDict, UserList, UserString
+
+ import urllib.parse
+ import urllib.request
+ import urllib.response
+ import urllib.robotparser
+ import urllib.error
+
+ import dbm
+ import dbm.dumb
+ import dbm.gnu # requires Python dbm support
+ import dbm.ndbm # requires Python dbm support
+
+ from itertools import filterfalse, zip_longest
+
+ from subprocess import getoutput, getstatusoutput
+
+ from sys import intern
+
+ import test.support
+
+
+The newly exposed ``urllib`` submodules are backports of those from Py3.x.
+This means, for example, that ``urllib.parse.unquote()`` now exists and takes
+an optional ``encoding`` argument on Py2.x as it does on Py3.x.
+
+**Limitation:** Note that the ``http``-based backports do not currently support
+HTTPS (as of 2015-09-11) because the SSL support changed considerably in Python
+3.x. If you need HTTPS support, please use this idiom for now::
+
+ from future.moves.urllib.request import urlopen
+
+Backports also exist of the following features from Python 3.4:
+
+- ``math.ceil`` returns an int on Py3
+- ``collections.ChainMap`` (for 2.7)
+- ``reprlib.recursive_repr`` (for 2.7)
+
+These can then be imported on Python 2.7+ as follows::
+
+ from future.standard_library import install_aliases
+ install_aliases()
+
+ from math import ceil # now returns an int
+ from collections import ChainMap
+ from reprlib import recursive_repr
+
+
+External standard-library backports
+-----------------------------------
+
+Backports of the following modules from the Python 3.x standard library are
+available independently of the python-future project::
+
+ import enum # pip install enum34
+ import singledispatch # pip install singledispatch
+ import pathlib # pip install pathlib
+
+A few modules from Python 3.4 are also available in the ``backports``
+package namespace after ``pip install backports.lzma`` etc.::
+
+ from backports import lzma
+ from backports import functools_lru_cache as lru_cache
+
+
+Included full backports
+-----------------------
+
+Alpha-quality full backports of the following modules from Python 3.3's
+standard library to Python 2.x are also available in ``future.backports``::
+
+ http.client
+ http.server
+ html.entities
+ html.parser
+ urllib
+ xmlrpc.client
+ xmlrpc.server
+
+The goal for these modules, unlike the modules in the ``future.moves`` package
+or top-level namespace, is to backport new functionality introduced in Python
+3.3.
+
+If you need the full backport of one of these packages, please open an issue `here
+`_.
diff --git a/docs/stdlib_incompatibilities.rst b/docs/stdlib_incompatibilities.rst
index 5da76d4e..e93f96ba 100644
--- a/docs/stdlib_incompatibilities.rst
+++ b/docs/stdlib_incompatibilities.rst
@@ -18,8 +18,7 @@ Here we will attempt to document these, together with known workarounds:
``base64``, ``decodebytes()`` function, :ref:`stdlib-base64-decodebytes`
``re``, ``ASCII`` mode, :ref:`stdlib-re-ASCII`
-To contribute to this, please email the python-porting list or send a
-pull request. See :ref:`contributing`.
+To contribute to this list, please send a pull request. See :ref:`contributing`.
.. _stdlib-array-constructor:
@@ -33,14 +32,14 @@ platform string: unicode string on Python 3, byte string on Python 2.
Python 2::
>>> array.array(b'b')
array.array(b'b')
-
+
>>> array.array(u'u')
TypeError: must be char, not unicode
Python 3::
>>> array.array(b'b')
TypeError: must be a unicode character, not bytes
-
+
>>> array.array(u'b')
array('b')
@@ -54,9 +53,12 @@ You can use the following code on both Python 3 and Python 2::
import array
# ...
-
+
a = array.array(bytes_to_native_str(b'b'))
+This was `fixed in Python 2.7.11
+`_.
+Since then, ``array.array()`` now also accepts unicode format typecode.
.. _stdlib-array-read:
@@ -67,9 +69,9 @@ This method has been removed in Py3. This crops up in e.g. porting ``http.client
.. _stdlib-base64-decodebytes:
-base64.decodebytes()
---------------------
-The ``base64`` module on Py2 has no 'decodebytes'.
+base64.decodebytes() and base64.encodebytes()
+---------------------------------------------
+The ``base64`` module on Py2 has no ``decodebytes`` or ``encodebytes`` functions.
.. _stdlib-re-ASCII:
@@ -88,14 +90,16 @@ This enables 'ASCII mode' for regular expressions (see the docs `here
struct.pack()
-------------
-The :func:`struct.pack` function must take a native string as its format argument. For example::
+Before Python version 2.7.7, the :func:`struct.pack` function
+required a native string as its format argument. For example::
>>> from __future__ import unicode_literals
>>> from struct import pack
- >>> pack('<4H2I', version, rec_type, build, year, file_hist_flags, ver_can_read)
-
-raises ``TypeError: Struct() argument 1 must be string, not unicode`` on Python
-2. To work around this, pass the format string argument as e.g.
-``future.utils.native('<4H2I')``.
+ >>> pack('<4H2I', version, rec_type, build, year, file_hist_flags, ver_can_read)
+raised ``TypeError: Struct() argument 1 must be string, not unicode``.
+This was `fixed in Python 2.7.7
+`_.
+Since then, ``struct.pack()`` now also accepts unicode format
+strings.
diff --git a/docs/str_object.rst b/docs/str_object.rst
index a2251cee..568b897a 100644
--- a/docs/str_object.rst
+++ b/docs/str_object.rst
@@ -14,15 +14,20 @@ There are also other differences, such as the ``repr`` of unicode strings in
Py2 having a ``u'...'`` prefix, versus simply ``'...'``, and the removal of
the :func:`str.decode` method in Py3.
-:mod:`future` contains a backport of the :mod:`str` object from Python 3 which
-inherits from the Python 2 :class:`unicode` class but has customizations to
-improve compatibility with Python 3's :class:`str` object. You can use it as
-follows::
+:mod:`future` contains a :class:`newstr` type that is a backport of the
+:mod:`str` object from Python 3. This inherits from the Python 2
+:class:`unicode` class but has customizations to improve compatibility with
+Python 3's :class:`str` object. You can use it as follows::
>>> from __future__ import unicode_literals
- >>> from future.builtins import str
+ >>> from builtins import str
-(On Py3, this simply imports the builtin :class:`str` object.)
+On Py2, this gives us::
+
+ >>> str
+ future.types.newstr.newstr
+
+(On Py3, it is simply the usual builtin :class:`str` object.)
Then, for example, the following code has the same effect on Py2 as on Py3::
@@ -44,14 +49,14 @@ Then, for example, the following code has the same effect on Py2 as on Py3::
TypeError: argument can't be
Various other operations that mix strings and bytes or other types are
-permitted on Py2 with the :class:`future.builtins.str` class even though they
+permitted on Py2 with the :class:`newstr` class even though they
are illegal with Python 3. For example::
>>> s2 = b'/' + str('ABCD')
>>> s2
'/ABCD'
>>> type(s2)
- future.builtins.backports.newstr.newstr
+ future.types.newstr.newstr
This is allowed for compatibility with parts of the Python 2 standard
library and various third-party libraries that mix byte-strings and unicode
@@ -62,16 +67,16 @@ they are unicode. (See ``posixpath.py``.) Another example is the
.. For example, this is permissible on Py2::
-..
+..
.. >>> u'u' > 10
.. True
-..
+..
.. >>> u'u' <= b'u'
.. True
-..
+..
.. On Py3, these raise TypeErrors.
-In most other ways, these :class:`future.builtins.str` objects on Py2 have the
+In most other ways, these :class:`builtins.str` objects on Py2 have the
same behaviours as Python 3's :class:`str`::
>>> s = str('ABCD')
@@ -79,21 +84,16 @@ same behaviours as Python 3's :class:`str`::
>>> assert list(s) == ['A', 'B', 'C', 'D']
>>> assert s.split('B') == ['A', 'CD']
-.. If you must ensure identical use of (unicode) strings across Py3 and Py2 in a
-.. single-source codebase, you can wrap string literals in a :func:`~str` call, as
-.. follows::
-..
-.. from __future__ import unicode_literals
-.. from future.builtins import *
-..
-.. # ...
-..
-.. s = str('This absolutely must behave like a Py3 string')
-..
-.. # ...
-..
-.. Most of the time this is unnecessary, but the stricter type-checking of the
-.. ``future.builtins.str`` object is useful for ensuring the same consistent
-.. separation between unicode and byte strings on Py2 as on Py3. This is
-.. important when writing protocol handlers, for example.
+The :class:`str` type from :mod:`builtins` also provides support for the
+``surrogateescape`` error handler on Python 2.x. Here is an example that works
+identically on Python 2.x and 3.x::
+
+ >>> from builtins import str
+ >>> s = str(u'\udcff')
+ >>> s.encode('utf-8', 'surrogateescape')
+ b'\xff'
+
+This feature is in alpha. Please leave feedback `here
+`_ about whether this
+works for you.
diff --git a/docs/translation.rst b/docs/translation.rst
new file mode 100644
index 00000000..632c46b1
--- /dev/null
+++ b/docs/translation.rst
@@ -0,0 +1,112 @@
+.. _translation:
+
+Using Python 2-only dependencies on Python 3
+--------------------------------------------
+
+The ``past`` module provides an experimental ``translation`` package to help
+with importing and using old Python 2 modules in a Python 3 environment.
+
+This is implemented using PEP 414 import hooks together with fixers from
+``lib2to3`` and ``libfuturize`` (included with ``python-future``) that
+attempt to automatically translate Python 2 code to Python 3 code with equivalent
+semantics upon import.
+
+*Note* This feature is still in alpha and needs further development to support a
+full range of real-world Python 2 modules. Also be aware that the API for
+this package might change considerably in later versions.
+
+Here is how to use it::
+
+ $ pip3 install plotrique==0.2.5-7 --no-compile # to ignore SyntaxErrors
+ $ python3
+
+Then pass in a whitelist of module name prefixes to the
+``past.translation.autotranslate()`` function. Example::
+
+ >>> from past.translation import autotranslate
+ >>> autotranslate(['plotrique'])
+ >>> import plotrique
+
+Here is another example::
+
+ >>> from past.translation import install_hooks, remove_hooks
+ >>> install_hooks(['mypy2module'])
+ >>> import mypy2module
+ >>> remove_hooks()
+
+This will translate, import and run Python 2 code such as the following::
+
+ ### File: mypy2module.py
+
+ # Print statements are translated transparently to functions:
+ print 'Hello from a print statement'
+
+ # xrange() is translated to Py3's range():
+ total = 0
+ for i in xrange(10):
+ total += i
+ print 'Total is: %d' % total
+
+ # Dictionary methods like .keys() and .items() are supported and
+ # return lists as on Python 2:
+ d = {'a': 1, 'b': 2}
+ assert d.keys() == ['a', 'b']
+ assert isinstance(d.items(), list)
+
+ # Functions like range, reduce, map, filter also return lists:
+ assert isinstance(range(10), list)
+
+ # The exec statement is supported:
+ exec 'total += 1'
+ print 'Total is now: %d' % total
+
+ # Long integers are supported:
+ k = 1234983424324L
+ print 'k + 1 = %d' % k
+
+ # Most renamed standard library modules are supported:
+ import ConfigParser
+ import HTMLParser
+ import urllib
+
+
+The attributes of the module are then accessible normally from Python 3.
+For example::
+
+ # This Python 3 code works
+ >>> type(mypy2module.d)
+ builtins.dict
+
+This is a standard Python 3 data type, so, when called from Python 3 code,
+``keys()`` returns a view, not a list::
+
+ >>> type(mypy2module.d.keys())
+ builtins.dict_keys
+
+
+.. _translation-limitations:
+
+Known limitations of ``past.translation``
+*****************************************
+
+- It currently requires a newline at the end of the module or it throws a
+ ``ParseError``.
+
+- This only works with pure-Python modules. C extension modules and Cython code
+ are not supported.
+
+- The biggest hurdle to automatic translation is likely to be ambiguity
+ about byte-strings and text (unicode strings) in the Python 2 code. If the
+ ``past.autotranslate`` feature fails because of this, you could try
+ running ``futurize`` over the code and adding a ``b''`` or ``u''`` prefix to
+ the relevant string literals. To convert between byte-strings and text (unicode
+ strings), add an ``.encode`` or ``.decode`` method call. If this succeeds,
+ please push your patches upstream to the package maintainers.
+
+- Otherwise, the source translation feature offered by the ``past.translation``
+ package has similar limitations to the ``futurize`` script (see
+ :ref:`futurize-limitations`). Help developing and testing this feature further
+ would be particularly welcome.
+
+Please report any bugs you find on the ``python-future`` `bug tracker
+`_.
diff --git a/docs/unicode_literals.rst b/docs/unicode_literals.rst
new file mode 100644
index 00000000..f6eb2839
--- /dev/null
+++ b/docs/unicode_literals.rst
@@ -0,0 +1,197 @@
+.. _unicode-literals:
+
+Should I import unicode_literals?
+---------------------------------
+
+The ``future`` package can be used with or without ``unicode_literals``
+imports.
+
+In general, it is more compelling to use ``unicode_literals`` when
+back-porting new or existing Python 3 code to Python 2/3 than when porting
+existing Python 2 code to 2/3. In the latter case, explicitly marking up all
+unicode string literals with ``u''`` prefixes would help to avoid
+unintentionally changing the existing Python 2 API. However, if changing the
+existing Python 2 API is not a concern, using ``unicode_literals`` may speed up
+the porting process.
+
+This section summarizes the benefits and drawbacks of using
+``unicode_literals``. To avoid confusion, we recommend using
+``unicode_literals`` everywhere across a code-base or not at all, instead of
+turning on for only some modules.
+
+
+
+Benefits
+~~~~~~~~
+
+1. String literals are unicode on Python 3. Making them unicode on Python 2
+ leads to more consistency of your string types across the two
+ runtimes. This can make it easier to understand and debug your code.
+
+2. Code without ``u''`` prefixes is cleaner, one of the claimed advantages
+ of Python 3. Even though some unicode strings would require a function
+ call to invert them to native strings for some Python 2 APIs (see
+ :ref:`stdlib-incompatibilities`), the incidence of these function calls
+ would usually be much lower than the incidence of ``u''`` prefixes for text
+ strings in the absence of ``unicode_literals``.
+
+3. The diff when porting to a Python 2/3-compatible codebase may be smaller,
+ less noisy, and easier to review with ``unicode_literals`` than if an
+ explicit ``u''`` prefix is added to every unadorned string literal.
+
+4. If support for Python 3.2 is required (e.g. for Ubuntu 12.04 LTS or
+ Debian wheezy), ``u''`` prefixes are a ``SyntaxError``, making
+ ``unicode_literals`` the only option for a Python 2/3 compatible
+ codebase. [However, note that ``future`` doesn't support Python 3.0-3.2.]
+
+
+Drawbacks
+~~~~~~~~~
+
+1. Adding ``unicode_literals`` to a module amounts to a "global flag day" for
+ that module, changing the data types of all strings in the module at once.
+ Cautious developers may prefer an incremental approach. (See
+ `here `_ for an excellent article
+ describing the superiority of an incremental patch-set in the the case
+ of the Linux kernel.)
+
+.. This is a larger-scale change than adding explicit ``u''`` prefixes to
+.. all strings that should be Unicode.
+
+2. Changing to ``unicode_literals`` will likely introduce regressions on
+ Python 2 that require an initial investment of time to find and fix. The
+ APIs may be changed in subtle ways that are not immediately obvious.
+
+ An example on Python 2::
+
+ ### Module: mypaths.py
+
+ ...
+ def unix_style_path(path):
+ return path.replace('\\', '/')
+ ...
+
+ ### User code:
+
+ >>> path1 = '\\Users\\Ed'
+ >>> unix_style_path(path1)
+ '/Users/ed'
+
+ On Python 2, adding a ``unicode_literals`` import to ``mypaths.py`` would
+ change the return type of the ``unix_style_path`` function from ``str`` to
+ ``unicode`` in the user code, which is difficult to anticipate and probably
+ unintended.
+
+ The counter-argument is that this code is broken, in a portability
+ sense; we see this from Python 3 raising a ``TypeError`` upon passing the
+ function a byte-string. The code needs to be changed to make explicit
+ whether the ``path`` argument is to be a byte string or a unicode string.
+
+3. With ``unicode_literals`` in effect, there is no way to specify a native
+ string literal (``str`` type on both platforms). This can be worked around as follows::
+
+ >>> from __future__ import unicode_literals
+ >>> ...
+ >>> from future.utils import bytes_to_native_str as n
+
+ >>> s = n(b'ABCD')
+ >>> s
+ 'ABCD' # on both Py2 and Py3
+
+ although this incurs a performance penalty (a function call and, on Py3,
+ a ``decode`` method call.)
+
+ This is a little awkward because various Python library APIs (standard
+ and non-standard) require a native string to be passed on both Py2
+ and Py3. (See :ref:`stdlib-incompatibilities` for some examples. WSGI
+ dictionaries are another.)
+
+3. If a codebase already explicitly marks up all text with ``u''`` prefixes,
+ and if support for Python versions 3.0-3.2 can be dropped, then
+ removing the existing ``u''`` prefixes and replacing these with
+ ``unicode_literals`` imports (the porting approach Django used) would
+ introduce more noise into the patch and make it more difficult to review.
+ However, note that the ``futurize`` script takes advantage of PEP 414 and
+ does not remove explicit ``u''`` prefixes that already exist.
+
+4. Turning on ``unicode_literals`` converts even docstrings to unicode, but
+ Pydoc breaks with unicode docstrings containing non-ASCII characters for
+ Python versions < 2.7.7. (`Fix
+ committed `_ in Jan 2014.)::
+
+ >>> def f():
+ ... u"Author: Martin von Löwis"
+
+ >>> help(f)
+
+ /Users/schofield/Install/anaconda/python.app/Contents/lib/python2.7/pydoc.pyc in pipepager(text, cmd)
+ 1376 pipe = os.popen(cmd, 'w')
+ 1377 try:
+ -> 1378 pipe.write(text)
+ 1379 pipe.close()
+ 1380 except IOError:
+
+ UnicodeEncodeError: 'ascii' codec can't encode character u'\xf6' in position 71: ordinal not in range(128)
+
+See `this Stack Overflow thread
+`_
+for other gotchas.
+
+
+Others' perspectives
+~~~~~~~~~~~~~~~~~~~~
+
+In favour of ``unicode_literals``
+*********************************
+
+Django recommends importing ``unicode_literals`` as its top `porting tip `_ for
+migrating Django extension modules to Python 3. The following `quote
+`_ is
+from Aymeric Augustin on 23 August 2012 regarding why he chose
+``unicode_literals`` for the port of Django to a Python 2/3-compatible
+codebase.:
+
+ "... I'd like to explain why this PEP [PEP 414, which allows explicit
+ ``u''`` prefixes for unicode literals on Python 3.3+] is at odds with
+ the porting philosophy I've applied to Django, and why I would have
+ vetoed taking advantage of it.
+
+ "I believe that aiming for a Python 2 codebase with Python 3
+ compatibility hacks is a counter-productive way to port a project. You
+ end up with all the drawbacks of Python 2 (including the legacy `u`
+ prefixes) and none of the advantages Python 3 (especially the sane
+ string handling).
+
+ "Working to write Python 3 code, with legacy compatibility for Python
+ 2, is much more rewarding. Of course it takes more effort, but the
+ results are much cleaner and much more maintainable. It's really about
+ looking towards the future or towards the past.
+
+ "I understand the reasons why PEP 414 was proposed and why it was
+ accepted. It makes sense for legacy software that is minimally
+ maintained. I hope nobody puts Django in this category!"
+
+
+Against ``unicode_literals``
+****************************
+
+ "There are so many subtle problems that ``unicode_literals`` causes.
+ For instance lots of people accidentally introduce unicode into
+ filenames and that seems to work, until they are using it on a system
+ where there are unicode characters in the filesystem path."
+
+ -- Armin Ronacher
+
+ "+1 from me for avoiding the unicode_literals future, as it can have
+ very strange side effects in Python 2.... This is one of the key
+ reasons I backed Armin's PEP 414."
+
+ -- Nick Coghlan
+
+ "Yeah, one of the nuisances of the WSGI spec is that the header values
+ IIRC are the str or StringType on both py2 and py3. With
+ unicode_literals this causes hard-to-spot bugs, as some WSGI servers
+ might be more tolerant than others, but usually using unicode in python
+ 2 for WSGI headers will cause the response to fail."
+
+ -- Antti Haapala
diff --git a/docs/upgrading.rst b/docs/upgrading.rst
new file mode 100644
index 00000000..0d8afca6
--- /dev/null
+++ b/docs/upgrading.rst
@@ -0,0 +1,12 @@
+.. upgrading
+
+Upgrading
+*********
+
+We strive to support compatibility between versions of ``python-future``. Part of this involves keeping around old interfaces and marking them as deprecated for a period to allow projects to transition in a straightforward manner to using the new interfaces.
+
+
+.. upgrading-to-v0.12
+
+Upgrading to v0.12
+==================
diff --git a/docs/utilities.rst b/docs/utilities.rst
new file mode 100644
index 00000000..e3f1e9c6
--- /dev/null
+++ b/docs/utilities.rst
@@ -0,0 +1,48 @@
+.. _utilities-guide:
+
+Utilities
+---------
+
+:mod:`future` also provides some useful functions and decorators to ease
+backward compatibility with Py2 in the :mod:`future.utils` and
+:mod:`past.utils` modules. These are a selection of the most useful functions
+from ``six`` and various home-grown Py2/3 compatibility modules from popular
+Python projects, such as Jinja2, Pandas, IPython, and Django. The goal is to
+consolidate these in one place, tested and documented, obviating the need for
+every project to repeat this work.
+
+Examples::
+
+ # Functions like print() expect __str__ on Py2 to return a byte
+ # string. This decorator maps the __str__ to __unicode__ on Py2 and
+ # defines __str__ to encode it as utf-8:
+
+ from future.utils import python_2_unicode_compatible
+
+ @python_2_unicode_compatible
+ class MyClass(object):
+ def __str__(self):
+ return u'Unicode string: \u5b54\u5b50'
+ a = MyClass()
+
+ # This then prints the Chinese characters for Confucius:
+ print(a)
+
+
+ # Iterators on Py3 require a __next__() method, whereas on Py2 this
+ # is called next(). This decorator allows Py3-style iterators to work
+ # identically on Py2:
+
+ @implements_iterator
+ class Upper(object):
+ def __init__(self, iterable):
+ self._iter = iter(iterable)
+ def __next__(self): # note the Py3 interface
+ return next(self._iter).upper()
+ def __iter__(self):
+ return self
+
+ print(list(Upper('hello')))
+ # prints ['H', 'E', 'L', 'L', 'O']
+
+On Python 3 these decorators are no-ops.
diff --git a/docs/what_else.rst b/docs/what_else.rst
index 1afb03fe..51f19869 100644
--- a/docs/what_else.rst
+++ b/docs/what_else.rst
@@ -10,8 +10,8 @@ compatible code.
.. include:: bytes_object.rst
.. include:: str_object.rst
-.. include:: int_object.rst
.. include:: dict_object.rst
+.. include:: int_object.rst
.. include:: isinstance.rst
.. include:: open_function.rst
.. include:: custom_str_methods.rst
@@ -23,4 +23,3 @@ compatible code.
.. include:: metaclasses.rst
..
-
diff --git a/docs/whatsnew.rst b/docs/whatsnew.rst
new file mode 100644
index 00000000..d706b2e5
--- /dev/null
+++ b/docs/whatsnew.rst
@@ -0,0 +1,30 @@
+.. _whats-new:
+
+What's New
+**********
+
+What's new in version 1.0.0 (2024-02-21)
+========================================
+
+The new version number of 1.0.0 indicates that the python-future project, like
+Python 2, is now done.
+
+The most important change in this release is adding support for Python 3.12
+(ba1cc50 and a6222d2 and bcced95).
+
+This release also includes these fixes:
+
+- Small updates to the docs
+- Add SECURITY.md describing security policy (0598d1b)
+- Fix pasteurize: NameError: name 'unicode' is not defined (de68c10)
+- Move CI to GitHub Actions (8cd11e8)
+- Add setuptools to requirements for building docs (0c347ff)
+- Fix typos in docs (350e87a)
+- Make the fix_unpacking fixer more robust (de68c10)
+- Small improvements to shell scripts according to shellcheck (6153844)
+
+
+Previous versions
+=================
+
+See :ref:`whats-old`.
diff --git a/docs/why_python3.rst b/docs/why_python3.rst
index 4992fc1f..a4b535f4 100644
--- a/docs/why_python3.rst
+++ b/docs/why_python3.rst
@@ -15,6 +15,38 @@ Why Python 3?
Unicode representation (PEP 393))
- Exception chaining
+Why are Unicode strings better on Python 3?
+-------------------------------------------
+
+- it is not the default string type (you have to prefix the string
+ with a u to get Unicode);
+
+- it is missing some functionality, e.g. casefold;
+
+- there are two distinct implementations, narrow builds and wide builds;
+
+- wide builds take up to four times more memory per string as needed;
+
+- narrow builds take up to two times more memory per string as needed;
+
+- worse, narrow builds have very naive (possibly even "broken")
+ handling of code points in the Supplementary Multilingual Planes.
+
+The unicode string type in Python 3 is better because:
+
+- it is the default string type;
+
+- it includes more functionality;
+
+- starting in Python 3.3, it gets rid of the distinction between
+ narrow and wide builds;
+
+- which reduces the memory overhead of strings by up to a factor
+ of four in many cases;
+
+- and fixes the issue of SMP code points.
+
+(quote from a mailing list post by Steve D'Aprano on 2014-01-17).
New features
@@ -24,11 +56,11 @@ Standard library:
~~~~~~~~~~~~~~~~~
- SSL contexts in http.client
--
+-
Non-arguments for Python 3
==========================
--
+-
diff --git a/future/__init__.py b/future/__init__.py
deleted file mode 100644
index 7f599b39..00000000
--- a/future/__init__.py
+++ /dev/null
@@ -1,98 +0,0 @@
-"""
-future: Easy, safe support for Python 3/2 compatibility
-=======================================================
-
-``future`` is the missing compatibility layer between Python 3 and Python
-2. It allows you to use a single, clean Python 3.x-compatible codebase to
-support both Python 3 and Python 2 with minimal overhead.
-
-Notable projects that use ``future`` for Python 2/3 compatibility are `Mezzanine `_ and `xlwt-future `_.
-
-It is designed to be used as follows::
-
- from __future__ import (absolute_import, division,
- print_function, unicode_literals)
- from future import *
-
-or explicitly as::
-
- from future.builtins import (bytes, int, range, round, str, super,
- ascii, chr, hex, input, oct, open,
- filter, map, zip)
-
-followed by predominantly standard, idiomatic Python 3 code that then runs
-similarly on Python 2.6/2.7 and Python 3.3+.
-
-The imports have no effect on Python 3. On Python 2, they shadow the
-corresponding builtins, which normally have different semantics on Python 3
-versus 2, to provide their Python 3 semantics.
-
-
-Standard library reorganization
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-``from future import standard_library`` provides a context-manager called
-``enable_hooks`` that installs import hooks (PEP 3108) to allow renamed and
-moved standard library modules to be imported from their new Py3 locations.
-
-
-Automatic conversion
---------------------
-An included script called `futurize
-`_ aids in converting
-code (from either Python 2 or Python 3) to code compatible with both
-platforms. It is similar to ``python-modernize`` but goes further in
-providing Python 3 compatibility through the use of the backported types
-and builtin functions in ``future``.
-
-
-Documentation
--------------
-
-See: http://python-future.org
-
-Also see the docstrings for each of these modules for more info::
-
-- future.standard_library
-- future.builtins
-- future.utils
-
-
-Credits
--------
-
-:Author: Ed Schofield
-:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte
- Ltd, Singapore. http://pythoncharmers.com
-:Others: - The backported ``super()`` and ``range()`` functions are
- derived from Ryan Kelly's ``magicsuper`` module and Dan
- Crosta's ``xrange`` module.
- - The ``futurize`` script uses ``lib2to3`` and fixers from
- Joe Amenta's ``lib3to2`` and Armin Ronacher's ``python-modernize``.
- - The ``python_2_unicode_compatible`` decorator is from
- Django. The ``implements_iterator`` and ``with_metaclass``
- decorators are from Jinja2.
- - Documentation is generated using ``sphinx`` and styled using
- ``sphinx-bootstrap-theme``.
-
-
-Licensing
----------
-Copyright 2013-2014 Python Charmers Pty Ltd, Australia.
-The software is distributed under an MIT licence. See LICENSE.txt.
-
-"""
-
-from future import standard_library, utils
-from future.builtins import *
-
-__title__ = 'future'
-__author__ = 'Ed Schofield'
-__license__ = 'MIT'
-__copyright__ = 'Copyright 2014 Python Charmers Pty Ltd'
-__ver_major__ = 0
-__ver_minor__ = 11
-__ver_patch__ = 0
-__ver_sub__ = '-dev'
-__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__,
- __ver_patch__, __ver_sub__)
diff --git a/future/builtins/__init__.py b/future/builtins/__init__.py
deleted file mode 100644
index f0f638cf..00000000
--- a/future/builtins/__init__.py
+++ /dev/null
@@ -1,34 +0,0 @@
-"""
-A module that brings in equivalents of the new and modified Python 3
-builtins into Py2. Has no effect on Py3.
-
-See the docs for these modules for more information::
-
-- future.builtins.iterators
-- future.builtins.backports
-- future.builtins.misc
-- future.builtins.disabled
-
-"""
-
-from future.builtins.iterators import (filter, map, zip)
-from future.builtins.misc import (ascii, chr, hex, input, oct, open)
-from future.builtins.backports import (bytes, dict, int, range, round, str,
- super)
-from future import utils
-
-if not utils.PY3:
- # We only import names that shadow the builtins on Py2. No other namespace
- # pollution on Py2.
-
- # Only shadow builtins on Py2; no new names
- __all__ = ['filter', 'map', 'zip',
- 'ascii', 'chr', 'hex', 'input', 'oct', 'open',
- 'bytes', 'dict', 'int', 'range', 'round', 'str', 'super',
- ]
-
-else:
- # No namespace pollution on Py3
- __all__ = []
-
- # TODO: add 'callable' for Py3.0 and Py3.1?
diff --git a/future/builtins/backports/newbytes.py b/future/builtins/backports/newbytes.py
deleted file mode 100644
index 9162f877..00000000
--- a/future/builtins/backports/newbytes.py
+++ /dev/null
@@ -1,284 +0,0 @@
-"""
-Pure-Python implementation of a Python 3-like bytes object for Python 2.
-
-Why do this? Without it, the Python 2 bytes object is a very, very
-different beast to the Python 3 bytes object. Running the
-test_bytes_from_py33.py script from the Python 3.3 test suite using
-Python 2 with its default str-aliased bytes object (after the appropriate
-import fixes, and using the backported test.support module) yields this:
- ------------------------------------------------------------------
- Ran 203 tests in 0.214s
-
- FAILED (failures=31, errors=55, skipped=1)
- ------------------------------------------------------------------
-when running
-
- $ python -m future.tests.test_bytes_from_py33
-
-"""
-
-from collections import Iterable
-from numbers import Integral
-
-from future.utils import istext, isbytes, PY3, with_metaclass
-from future.builtins.backports import no, issubset
-
-
-_builtin_bytes = bytes
-
-if PY3:
- # We'll probably never use newstr on Py3 anyway...
- unicode = str
-
-
-class BaseNewBytes(type):
- def __instancecheck__(cls, instance):
- return isinstance(instance, _builtin_bytes)
-
-
-class newbytes(with_metaclass(BaseNewBytes, _builtin_bytes)):
- """
- A backport of the Python 3 bytes object to Py2
- """
- def __new__(cls, *args, **kwargs):
- """
- From the Py3 bytes docstring:
-
- bytes(iterable_of_ints) -> bytes
- bytes(string, encoding[, errors]) -> bytes
- bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer
- bytes(int) -> bytes object of size given by the parameter initialized with null bytes
- bytes() -> empty bytes object
-
- Construct an immutable array of bytes from:
- - an iterable yielding integers in range(256)
- - a text string encoded using the specified encoding
- - any object implementing the buffer API.
- - an integer
- """
-
- if len(args) == 0:
- return super(newbytes, cls).__new__(cls)
- # Was: elif isinstance(args[0], newbytes):
- # We use type() instead of the above because we're redefining
- # this to be True for all unicode string subclasses. Warning:
- # This may render newstr un-subclassable.
- elif type(args[0]) == newbytes:
- return args[0]
- elif isinstance(args[0], _builtin_bytes):
- value = args[0]
- elif isinstance(args[0], unicode):
- if 'encoding' not in kwargs:
- raise TypeError('unicode string argument without an encoding')
- ###
- # Was: value = args[0].encode(**kwargs)
- # Python 2.6 string encode() method doesn't take kwargs:
- # Use this instead:
- newargs = [kwargs['encoding']]
- if 'errors' in kwargs:
- newargs.append(kwargs['errors'])
- value = args[0].encode(*newargs)
- ###
- elif isinstance(args[0], Iterable):
- if len(args[0]) == 0:
- # What is this?
- raise ValueError('unknown argument type')
- elif len(args[0]) > 0 and isinstance(args[0][0], Integral):
- # It's a list of integers
- value = b''.join([chr(x) for x in args[0]])
- else:
- raise ValueError('item cannot be interpreted as an integer')
- elif isinstance(args[0], Integral):
- if args[0] < 0:
- raise ValueError('negative count')
- value = b'\x00' * args[0]
- else:
- value = args[0]
- return super(newbytes, cls).__new__(cls, value)
-
- def __repr__(self):
- return 'b' + super(newbytes, self).__repr__()
-
- def __str__(self):
- return 'b' + "'{0}'".format(super(newbytes, self).__str__())
-
- def __getitem__(self, y):
- value = super(newbytes, self).__getitem__(y)
- if isinstance(y, Integral):
- return ord(value)
- else:
- return newbytes(value)
-
- def __getslice__(self, *args):
- return self.__getitem__(slice(*args))
-
- def __contains__(self, key):
- if isinstance(key, int):
- newbyteskey = newbytes([key])
- # Don't use isinstance() here because we only want to catch
- # newbytes, not Python 2 str:
- elif type(key) == newbytes:
- newbyteskey = key
- else:
- newbyteskey = newbytes(key)
- return issubset(list(newbyteskey), list(self))
-
- @no(unicode)
- def __add__(self, other):
- return newbytes(super(newbytes, self).__add__(other))
-
- @no(unicode)
- def __radd__(self, left):
- return newbytes(left) + self
-
- @no(unicode)
- def __mul__(self, other):
- return newbytes(super(newbytes, self).__mul__(other))
-
- @no(unicode)
- def __rmul__(self, other):
- return newbytes(super(newbytes, self).__rmul__(other))
-
- def join(self, iterable_of_bytes):
- errmsg = 'sequence item {0}: expected bytes, {1} found'
- if isbytes(iterable_of_bytes) or istext(iterable_of_bytes):
- raise TypeError(errmsg.format(0, type(iterable_of_bytes)))
- for i, item in enumerate(iterable_of_bytes):
- if istext(item):
- raise TypeError(errmsg.format(i, type(item)))
- return newbytes(super(newbytes, self).join(iterable_of_bytes))
-
- @classmethod
- def fromhex(cls, string):
- # Only on Py2:
- return cls(string.replace(' ', '').decode('hex'))
-
- @no(unicode)
- def find(self, sub, *args):
- return newbytes(super(newbytes, self).find(sub, *args))
-
- @no(unicode)
- def rfind(self, sub, *args):
- return newbytes(super(newbytes, self).rfind(sub, *args))
-
- @no(unicode, (1, 2))
- def replace(self, old, new, *args):
- return newbytes(super(newbytes, self).replace(old, new, *args))
-
- def encode(self, *args):
- raise AttributeError("encode method has been disabled in newbytes")
-
- def decode(self, encoding='utf-8', errors='strict'):
- """
- Returns a newstr (i.e. unicode subclass)
-
- Decode B using the codec registered for encoding. Default encoding
- is 'utf-8'. errors may be given to set a different error
- handling scheme. Default is 'strict' meaning that encoding errors raise
- a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
- as well as any other name registered with codecs.register_error that is
- able to handle UnicodeDecodeErrors.
- """
- from future.builtins.backports.newstr import newstr
- return newstr(super(newbytes, self).decode(encoding, errors))
-
- @no(unicode)
- def startswith(self, prefix, *args):
- return super(newbytes, self).startswith(prefix, *args)
-
- @no(unicode)
- def endswith(self, prefix, *args):
- return super(newbytes, self).endswith(prefix, *args)
-
- @no(unicode)
- def split(self, sep=None, maxsplit=-1):
- # Py2 str.split() takes maxsplit as an optional parameter, not as a
- # keyword argument as in Python 3 bytes.
- parts = super(newbytes, self).split(sep, maxsplit)
- return [newbytes(part) for part in parts]
-
- @no(unicode)
- def rsplit(self, sep=None, maxsplit=-1):
- # Py2 str.rsplit() takes maxsplit as an optional parameter, not as a
- # keyword argument as in Python 3 bytes.
- parts = super(newbytes, self).rsplit(sep, maxsplit)
- return [newbytes(part) for part in parts]
-
- @no(unicode)
- def partition(self, sep):
- parts = super(newbytes, self).partition(sep)
- return tuple(newbytes(part) for part in parts)
-
- @no(unicode)
- def rpartition(self, sep):
- parts = super(newbytes, self).rpartition(sep)
- return tuple(newbytes(part) for part in parts)
-
- @no(unicode)
- def index(self, sub, *args):
- '''
- Returns index of sub in bytes.
- Raises ValueError if byte is not in bytes and TypeError if can't
- be converted bytes or its length is not 1.
- '''
- if isinstance(sub, int):
- if len(args) == 0:
- start, end = 0, len(self)
- elif len(args) == 1:
- start = args[0]
- elif len(args) == 2:
- start, end = args
- else:
- raise TypeError('takes at most 3 arguments')
- return list(self)[start:end].index(sub)
- if not isinstance(sub, bytes):
- try:
- sub = self.__class__(sub)
- except (TypeError, ValueError):
- raise TypeError("can't convert sub to bytes")
- try:
- return super(newbytes, self).index(sub, *args)
- except ValueError:
- raise ValueError('substring not found')
-
- def __eq__(self, other):
- if isinstance(other, _builtin_bytes):
- return super(newbytes, self).__eq__(other)
- else:
- return False
-
- def __ne__(self, other):
- if isinstance(other, _builtin_bytes):
- return super(newbytes, self).__ne__(other)
- else:
- return True
-
- unorderable_err = 'unorderable types: bytes() and {0}'
-
- def __lt__(self, other):
- if not isbytes(other):
- raise TypeError(self.unorderable_err.format(type(other)))
- return super(newbytes, self).__lt__(other)
-
- def __le__(self, other):
- if not isbytes(other):
- raise TypeError(self.unorderable_err.format(type(other)))
- return super(newbytes, self).__le__(other)
-
- def __gt__(self, other):
- if not isbytes(other):
- raise TypeError(self.unorderable_err.format(type(other)))
- return super(newbytes, self).__gt__(other)
-
- def __ge__(self, other):
- if not isbytes(other):
- raise TypeError(self.unorderable_err.format(type(other)))
- return super(newbytes, self).__ge__(other)
-
- def __native__(self):
- # We can't just feed a newbytes object into str(), because
- # newbytes.__str__() returns e.g. "b'blah'", consistent with Py3 bytes.
- return super(newbytes, self).__str__()
-
-
-__all__ = ['newbytes']
diff --git a/future/builtins/backports/newdict.py b/future/builtins/backports/newdict.py
deleted file mode 100644
index 0e46d91f..00000000
--- a/future/builtins/backports/newdict.py
+++ /dev/null
@@ -1,113 +0,0 @@
-"""
-A dict subclass for Python 2 that behaves like Python 3's dict
-
-Example use:
-
->>> from future.builtins import dict
->>> d1 = dict() # instead of {} for an empty dict
->>> d2 = dict(key1='value1', key2='value2')
-
-The keys, values and items methods now return iterators on Python 2.x
-(with set-like behaviour on Python 2.7).
-
->>> for d in (d1, d2):
-... assert not isinstance(d.keys(), list)
-... assert not isinstance(d.values(), list)
-... assert not isinstance(d.items(), list)
-"""
-
-import sys
-
-from future.utils import with_metaclass
-
-
-_builtin_dict = dict
-ver = sys.version_info[:2]
-
-
-class BaseNewDict(type):
- def __instancecheck__(cls, instance):
- return isinstance(instance, _builtin_dict)
-
-class newdict(with_metaclass(BaseNewDict, _builtin_dict)):
- """
- A backport of the Python 3 dict object to Py2
- """
- def items(self):
- """
- On Python 2.7+:
- D.items() -> a set-like object providing a view on D's items
- On Python 2.6:
- D.items() -> an iterator over D's items
- """
- if ver == (2, 7):
- return self.viewitems()
- elif ver == (2, 6):
- return self.iteritems()
- elif ver >= (3, 0):
- return self.items()
-
- def keys(self):
- """
- On Python 2.7+:
- D.keys() -> a set-like object providing a view on D's keys
- On Python 2.6:
- D.keys() -> an iterator over D's keys
- """
- if ver == (2, 7):
- return self.viewkeys()
- elif ver == (2, 6):
- return self.iterkeys()
- elif ver >= (3, 0):
- return self.keys()
-
- def values(self):
- """
- On Python 2.7+:
- D.values() -> a set-like object providing a view on D's values
- On Python 2.6:
- D.values() -> an iterator over D's values
- """
- if ver == (2, 7):
- return self.viewvalues()
- elif ver == (2, 6):
- return self.itervalues()
- elif ver >= (3, 0):
- return self.values()
-
- def __new__(cls, *args, **kwargs):
- """
- dict() -> new empty dictionary
- dict(mapping) -> new dictionary initialized from a mapping object's
- (key, value) pairs
- dict(iterable) -> new dictionary initialized as if via:
- d = {}
- for k, v in iterable:
- d[k] = v
- dict(**kwargs) -> new dictionary initialized with the name=value pairs
- in the keyword argument list. For example: dict(one=1, two=2)
-
- """
-
- if len(args) == 0:
- return super(newdict, cls).__new__(cls)
- # Was: elif isinstance(args[0], newbytes):
- # We use type() instead of the above because we're redefining
- # this to be True for all unicode string subclasses. Warning:
- # This may render newstr un-subclassable.
- elif type(args[0]) == newdict:
- return args[0]
- # elif isinstance(args[0], _builtin_dict):
- # value = args[0]
- else:
- value = args[0]
- return super(newdict, cls).__new__(cls, value)
-
- def __native__(self):
- """
- Hook for the future.utils.native() function
- """
- return super(newbytes, self)
-
-
-__all__ = ['newdict']
diff --git a/future/builtins/backports/newint.py b/future/builtins/backports/newint.py
deleted file mode 100644
index 7efccc9c..00000000
--- a/future/builtins/backports/newint.py
+++ /dev/null
@@ -1,196 +0,0 @@
-"""
-Backport of Python 3's int, based on Py2's long.
-
-They are very similar. The most notable difference is:
-
-- representation: trailing L in Python 2 removed in Python 3
-
-"""
-
-from numbers import Integral
-
-from future.builtins.backports.newbytes import newbytes
-from future.utils import PY3, isint, istext, isbytes, with_metaclass
-
-
-if PY3:
- long = int
-
-
-class BaseNewInt(type):
- def __instancecheck__(cls, instance):
- # Special case for Py2 short or long int
- return isinstance(instance, (int, long))
-
-
-class newint(with_metaclass(BaseNewInt, long)):
- """
- A backport of the Python 3 int object to Py2
- """
- def __new__(cls, x=0, base=10):
- """
- From the Py3 int docstring:
-
- | int(x=0) -> integer
- | int(x, base=10) -> integer
- |
- | Convert a number or string to an integer, or return 0 if no arguments
- | are given. If x is a number, return x.__int__(). For floating point
- | numbers, this truncates towards zero.
- |
- | If x is not a number or if base is given, then x must be a string,
- | bytes, or bytearray instance representing an integer literal in the
- | given base. The literal can be preceded by '+' or '-' and be surrounded
- | by whitespace. The base defaults to 10. Valid bases are 0 and 2-36.
- | Base 0 means to interpret the base from the string as an integer literal.
- | >>> int('0b100', base=0)
- | 4
-
- """
- try:
- val = x.__int__()
- except AttributeError:
- val = x
- else:
- if not isint(val):
- raise TypeError('__int__ returned non-int ({0})'.format(type(val)))
-
- if base != 10:
- # Explicit base
- if not (istext(val) or isbytes(val) or isinstance(val, bytearray)):
- raise TypeError("int() can't convert non-string with explicit base")
- try:
- return super(newint, cls).__new__(cls, val, base)
- except TypeError:
- return super(newint, cls).__new__(cls, newbytes(val), base)
- # After here, base is 10
- try:
- return super(newint, cls).__new__(cls, val)
- except TypeError:
- # Py2 long doesn't handle bytearray input with an explicit base, so
- # handle this here.
- # Py3: int(bytearray(b'10'), 2) == 2
- # Py2: int(bytearray(b'10'), 2) == 2 raises TypeError
- # Py2: long(bytearray(b'10'), 2) == 2 raises TypeError
- try:
- return super(newint, cls).__new__(cls, newbytes(val))
- except:
- raise TypeError("newint argument must be a string or a number, not '{0}'".format(
- type(val)))
-
-
- def __repr__(self):
- """
- Without the L suffix
- """
- value = super(newint, self).__repr__()
- assert value[-1] == 'L'
- return value[:-1]
-
- def __add__(self, other):
- return newint(super(newint, self).__add__(other))
-
- def __radd__(self, other):
- return newint(super(newint, self).__radd__(other))
-
- def __sub__(self, other):
- return newint(super(newint, self).__sub__(other))
-
- def __rsub__(self, other):
- return newint(super(newint, self).__rsub__(other))
-
- def __mul__(self, other):
- value = super(newint, self).__mul__(other)
- if isint(value):
- return newint(value)
- return value
-
- def __rmul__(self, other):
- value = super(newint, self).__rmul__(other)
- if isint(value):
- return newint(value)
- return value
-
- def __div__(self, other):
- return newint(super(newint, self).__div__(other))
-
- def __rdiv__(self, other):
- return newint(super(newint, self).__rdiv__(other))
-
- def __floordiv__(self, other):
- return newint(super(newint, self).__floordiv__(other))
-
- def __rfloordiv__(self, other):
- return newint(super(newint, self).__rfloordiv__(other))
-
- def __mod__(self, other):
- return newint(super(newint, self).__mod__(other))
-
- def __rmod__(self, other):
- return newint(super(newint, self).__rmod__(other))
-
- def __divmod__(self, other):
- result = super(newint, self).__divmod__(other)
- return (newint(result[0]), newint(result[1]))
-
- def __rdivmod__(self, other):
- result = super(newint, self).__rdivmod__(other)
- return (newint(result[0]), newint(result[1]))
-
- def __pow__(self, other):
- return newint(super(newint, self).__pow__(other))
-
- def __rpow__(self, other):
- return newint(super(newint, self).__rpow__(other))
-
- def __lshift__(self, other):
- return newint(super(newint, self).__lshift__(other))
-
- def __rlshift__(self, other):
- return newint(super(newint, self).__lshift__(other))
-
- def __rshift__(self, other):
- return newint(super(newint, self).__rshift__(other))
-
- def __rrshift__(self, other):
- return newint(super(newint, self).__rshift__(other))
-
- def __and__(self, other):
- return newint(super(newint, self).__and__(other))
-
- def __rand__(self, other):
- return newint(super(newint, self).__rand__(other))
-
- def __or__(self, other):
- return newint(super(newint, self).__or__(other))
-
- def __ror__(self, other):
- return newint(super(newint, self).__ror__(other))
-
- def __xor__(self, other):
- return newint(super(newint, self).__xor__(other))
-
- def __rxor__(self, other):
- return newint(super(newint, self).__rxor__(other))
-
- # __radd__(self, other) __rsub__(self, other) __rmul__(self, other) __rdiv__(self, other) __rtruediv__(self, other) __rfloordiv__(self, other) __rmod__(self, other) __rdivmod__(self, other) __rpow__(self, other) __rlshift__(self, other) __rrshift__(self, other) __rand__(self, other) __rxor__(self, other) __ror__(self, other)
-
- # __iadd__(self, other) __isub__(self, other) __imul__(self, other) __idiv__(self, other) __itruediv__(self, other) __ifloordiv__(self, other) __imod__(self, other) __ipow__(self, other, [modulo]) __ilshift__(self, other) __irshift__(self, other) __iand__(self, other) __ixor__(self, other) __ior__(self, other)
-
- def __neg__(self):
- return newint(super(newint, self).__neg__())
-
- def __pos__(self):
- return newint(super(newint, self).__pos__())
-
- def __abs__(self):
- return newint(super(newint, self).__abs__())
-
- def __invert__(self):
- return newint(super(newint, self).__invert__())
-
- def __native__(self):
- return long(self)
-
-
-__all__ = ['newint']
diff --git a/future/builtins/backports/newround.py b/future/builtins/backports/newround.py
deleted file mode 100644
index 4287afff..00000000
--- a/future/builtins/backports/newround.py
+++ /dev/null
@@ -1,48 +0,0 @@
-"""
-``python-future``: pure Python implementation of Python 3 round().
-"""
-
-from future.utils import PYPY
-
-
-def newround(number, ndigits=None):
- """
- See Python 3 documentation: uses Banker's Rounding.
-
- Delegates to the __round__ method if for some reason this exists.
-
- If not, rounds a number to a given precision in decimal digits (default
- 0 digits). This returns an int when called with one argument,
- otherwise the same type as the number. ndigits may be negative.
-
- See the test_round method in future/tests/test_builtins.py for
- examples.
- """
- return_int = False
- if ndigits is None:
- return_int = True
- ndigits = 0
- if hasattr(number, '__round__'):
- return number.__round__(ndigits)
-
- # Use the decimal module for simplicity of implementation (and
- # hopefully correctness).
- from decimal import Decimal, ROUND_HALF_EVEN
-
- if ndigits < 0:
- raise NotImplementedError('negative ndigits not supported yet')
- exponent = Decimal('10') ** (-ndigits)
-
- if PYPY:
- # Work around issue #24: round() breaks on PyPy with NumPy's types
- if 'numpy' in repr(type(number)):
- number = float(number)
- d = Decimal.from_float(number).quantize(exponent,
- rounding=ROUND_HALF_EVEN)
- if return_int:
- return int(d)
- else:
- return float(d)
-
-
-__all__ = ['newround']
diff --git a/future/builtins/misc.py b/future/builtins/misc.py
deleted file mode 100644
index 46b6d472..00000000
--- a/future/builtins/misc.py
+++ /dev/null
@@ -1,79 +0,0 @@
-"""
-A module that brings in equivalents of the new and modified Python 3
-builtins into Py2. Has no effect on Py3.
-
-The builtin functions are:
-
-- ``ascii`` (from Py2's future_builtins module)
-- ``hex`` (from Py2's future_builtins module)
-- ``oct`` (from Py2's future_builtins module)
-- ``chr`` (equivalent to ``unichr`` on Py2)
-- ``input`` (equivalent to ``raw_input`` on Py2)
-- ``open`` (equivalent to io.open on Py2)
-
-
-input()
--------
-Like the new ``input()`` function from Python 3 (without eval()), except
-that it returns bytes. Equivalent to Python 2's ``raw_input()``.
-
-Warning: By default, importing this module *removes* the old Python 2
-input() function entirely from ``__builtin__`` for safety. This is
-because forgetting to import the new ``input`` from ``future`` might
-otherwise lead to a security vulnerability (shell injection) on Python 2.
-
-To restore it, you can retrieve it yourself from
-``__builtin__._old_input``.
-
-Fortunately, ``input()`` seems to be seldom used in the wild in Python
-2...
-
-"""
-
-from future.builtins.backports.newint import newint
-from future import utils
-
-
-if utils.PY2:
- from io import open
- from future_builtins import ascii, oct, hex
- from __builtin__ import unichr as chr
- import __builtin__
-
- # The following seems like a good idea, but it may be a bit
- # paranoid and the implementation may be fragile:
-
- # Python 2's input() is unsafe and MUST not be able to be used
- # accidentally by someone who expects Python 3 semantics but forgets
- # to import it on Python 2. So we delete it from __builtin__. We
- # keep a copy though:
- __builtin__._oldinput = __builtin__.input
- delattr(__builtin__, 'input')
-
- input = raw_input
-
- # In case some code wants to import 'callable' portably from Py3.0/3.1:
- callable = __builtin__.callable
-
- __all__ = ['ascii', 'chr', 'hex', 'input', 'oct', 'open']
-
-else:
- import builtins
- ascii = builtins.ascii
- chr = builtins.chr
- hex = builtins.hex
- input = builtins.input
- oct = builtins.oct
- open = builtins.open
-
- __all__ = []
-
- # From Pandas, for Python versions 3.0 and 3.1 only. The callable()
- # function was removed from Py3.0 and 3.1 and reintroduced into Py3.2.
- try:
- # callable reintroduced in later versions of Python
- callable = builtins.callable
- except AttributeError:
- def callable(obj):
- return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
- __all__.append('callable')
diff --git a/future/standard_library/__init__.py b/future/standard_library/__init__.py
deleted file mode 100644
index df793111..00000000
--- a/future/standard_library/__init__.py
+++ /dev/null
@@ -1,441 +0,0 @@
-"""
-Python 3 reorganized the standard library (PEP 3108). This module exposes
-several standard library modules to Python 2 under their new Python 3
-names.
-
-It is designed to be used as follows::
-
- from future import standard_library
-
-And then these normal Py3 imports work on both Py3 and Py2::
-
- import builtins
- import configparser
- import copyreg
- import queue
- import reprlib
- import socketserver
- import winreg # on Windows only
- import test.support
- import html, html.parser, html.entites
- import http, http.client, http.server
- import _thread
- import _dummythread
- import _markupbase
-
- from itertools import filterfalse, zip_longest
- from sys import intern
-
-(The renamed modules and functions are still available under their old
-names on Python 2.)
-
-To turn off the import hooks, use::
-
- standard_library.remove_hooks()
-
-and to turn it on again, use::
-
- standard_library.install_hooks()
-
-This is a cleaner alternative to this idiom (see
-http://docs.pythonsprints.com/python3_porting/py-porting.html)::
-
- try:
- import queue
- except ImportError:
- import Queue as queue
-
-
-Limitations
------------
-We don't currently support these modules, but would like to::
-
- import http.cookies, http.cookiejar
- import dbm
- import dbm.dumb
- import dbm.gnu
- import xmlrpc.client
- import collections.abc # on Py33
- import urllib.request
- import urllib.parse
- import urllib.error
- import urllib.robotparser
- import tkinter
- import pickle # should (optionally) bring in cPickle on Python 2
-
-
-Notes
------
-This module only supports Python 2.6, Python 2.7, and Python 3.1+.
-
-The following renames are already supported on Python 2.7 without any
-additional work from us::
-
- reload() -> imp.reload()
- reduce() -> functools.reduce()
- StringIO.StringIO -> io.StringIO
- Bytes.BytesIO -> io.BytesIO
-
-Old things that can one day be fixed automatically by futurize.py::
-
- string.uppercase -> string.ascii_uppercase # works on either Py2.7 or Py3+
- sys.maxint -> sys.maxsize # but this isn't identical
-
-TODO: Check out these:
-Not available on Py2.6:
- unittest2 -> unittest?
- buffer -> memoryview?
-
-"""
-
-from __future__ import absolute_import
-
-import sys
-import logging
-import imp
-import contextlib
-
-from future import utils
-
-# The modules that are defined under the same names on Py3 but with
-# different contents in a significant way (e.g. submodules) are:
-# pickle (fast one)
-# dbm
-# urllib
-# test
-
-# These ones are new (i.e. no problem)
-# http
-# html
-# tkinter
-# xmlrpc
-
-# These modules need names from elsewhere being added to them:
-# subprocess: should provide getoutput and other fns from commands
-# module but these fns are missing: getstatus, mk2arg,
-# mkarg
-
-# Old to new
-# etc: see lib2to3/fixes/fix_imports.py
-RENAMES = {
- # 'cStringIO': 'io', # there's a new io module in Python 2.6
- # that provides StringIO and BytesIO
- # 'StringIO': 'io', # ditto
- # 'cPickle': 'pickle',
- '__builtin__': 'builtins',
- 'copy_reg': 'copyreg',
- 'Queue': 'queue',
- 'future.standard_library.socketserver': 'socketserver',
- 'ConfigParser': 'configparser',
- 'repr': 'reprlib',
- # 'FileDialog': 'tkinter.filedialog',
- # 'tkFileDialog': 'tkinter.filedialog',
- # 'SimpleDialog': 'tkinter.simpledialog',
- # 'tkSimpleDialog': 'tkinter.simpledialog',
- # 'tkColorChooser': 'tkinter.colorchooser',
- # 'tkCommonDialog': 'tkinter.commondialog',
- # 'Dialog': 'tkinter.dialog',
- # 'Tkdnd': 'tkinter.dnd',
- # 'tkFont': 'tkinter.font',
- # 'tkMessageBox': 'tkinter.messagebox',
- # 'ScrolledText': 'tkinter.scrolledtext',
- # 'Tkconstants': 'tkinter.constants',
- # 'Tix': 'tkinter.tix',
- # 'ttk': 'tkinter.ttk',
- # 'Tkinter': 'tkinter',
- '_winreg': 'winreg',
- 'thread': '_thread',
- 'dummy_thread': '_dummy_thread',
- # 'anydbm': 'dbm', # causes infinite import loop
- # 'whichdb': 'dbm', # causes infinite import loop
- # anydbm and whichdb are handled by fix_imports2
- # 'dbhash': 'dbm.bsd',
- # 'dumbdbm': 'dbm.dumb',
- # 'dbm': 'dbm.ndbm',
- # 'gdbm': 'dbm.gnu',
- # 'xmlrpclib': 'xmlrpc.client',
- # 'DocXMLRPCServer': 'xmlrpc.server',
- # 'SimpleXMLRPCServer': 'xmlrpc.server',
- # 'httplib': 'http.client',
- # 'htmlentitydefs' : 'html.entities',
- # 'HTMLParser' : 'html.parser',
- # 'Cookie': 'http.cookies',
- # 'cookielib': 'http.cookiejar',
- # 'BaseHTTPServer': 'http.server',
- # 'SimpleHTTPServer': 'http.server',
- # 'CGIHTTPServer': 'http.server',
- 'future.standard_library.test': 'test', # primarily for renaming test_support to support
- # 'commands': 'subprocess',
- # 'urlparse' : 'urllib.parse',
- # 'robotparser' : 'urllib.robotparser',
- # 'abc': 'collections.abc', # for Py33
- 'future.standard_library.html': 'html',
- 'future.standard_library.http': 'http',
- # 'future.standard_library.urllib': 'newurllib',
- 'future.standard_library._markupbase': '_markupbase',
- }
-
-
-REPLACED_MODULES = set(['test', 'urllib', 'pickle']) # add dbm when we support it
-# These are entirely new to Python 2.x, so they cause no potential clashes
-# xmlrpc, tkinter, http, html
-
-
-class WarnOnImport(object):
- def __init__(self, *args):
- self.module_names = args
-
- def find_module(self, fullname, path=None):
- if fullname in self.module_names:
- self.path = path
- return self
- return None
-
- def load_module(self, name):
- if name in sys.modules:
- return sys.modules[name]
- module_info = imp.find_module(name, self.path)
- module = imp.load_module(name, *module_info)
- sys.modules[name] = module
-
- logging.warning("Imported deprecated module %s", name)
- return module
-
-
-class RenameImport(object):
- """
- A class for import hooks mapping Py3 module names etc. to the Py2 equivalents.
- """
- # Different RenameImport classes are created when importing this module from
- # different source files. This causes isinstance(hook, RenameImport) checks
- # to produce inconsistent results. We add this RENAMER attribute here so
- # remove_hooks() and install_hooks() can find instances of these classes
- # easily:
- RENAMER = True
-
- def __init__(self, old_to_new):
- '''
- Pass in a dictionary-like object mapping from old names to new
- names. E.g. {'ConfigParser': 'configparser', 'cPickle': 'pickle'}
- '''
- self.old_to_new = old_to_new
- both = set(old_to_new.keys()) & set(old_to_new.values())
- assert (len(both) == 0 and
- len(set(old_to_new.values())) == len(old_to_new.values())), \
- 'Ambiguity in renaming (handler not implemented)'
- self.new_to_old = dict((new, old) for (old, new) in old_to_new.items())
-
- def find_module(self, fullname, path=None):
- # Handles hierarchical importing: package.module.module2
- new_base_names = set([s.split('.')[0] for s in self.new_to_old])
- if fullname in set(self.old_to_new) | new_base_names:
- return self
- return None
-
- def load_module(self, name):
- path = None
- if name in sys.modules:
- return sys.modules[name]
- elif name in self.new_to_old:
- # New name. Look up the corresponding old (Py2) name:
- name = self.new_to_old[name]
- with suspend_hooks():
- module = self._find_and_load_module(name)
- sys.modules[name] = module
- return module
-
- def _find_and_load_module(self, name, path=None):
- """
- Finds and loads it. But if there's a . in the name, handles it
- properly.
- """
- bits = name.split('.')
- while len(bits) > 1:
- # Treat the first bit as a package
- packagename = bits.pop(0)
- package = self._find_and_load_module(packagename, path)
- path = package.__path__
- name = bits[0]
- module_info = imp.find_module(name, path)
- return imp.load_module(name, *module_info)
-
-
-# (New module name, new object name, old module name, old object name)
-MOVES = [('collections', 'UserList', 'UserList', 'UserList'),
- ('collections', 'UserDict', 'UserDict', 'UserDict'),
- ('collections', 'UserString','UserString', 'UserString'),
- ('itertools', 'filterfalse','itertools', 'ifilterfalse'),
- ('itertools', 'zip_longest','itertools', 'izip_longest'),
- ('sys', 'intern','__builtin__', 'intern'),
- # urllib._urlopener urllib.request
- # urllib.ContentTooShortError urllib.error
- # urllib.FancyURLOpener urllib.request
- # urllib.pathname2url urllib.request
- # urllib.quote urllib.parse
- # urllib.quote_plus urllib.parse
- # urllib.splitattr urllib.parse
- # urllib.splithost urllib.parse
- # urllib.splitnport urllib.parse
- # urllib.splitpasswd urllib.parse
- # urllib.splitport urllib.parse
- # urllib.splitquery urllib.parse
- # urllib.splittag urllib.parse
- # urllib.splittype urllib.parse
- # urllib.splituser urllib.parse
- # urllib.splitvalue urllib.parse
- # urllib.unquote urllib.parse
- # urllib.unquote_plus urllib.parse
- # urllib.urlcleanup urllib.request
- # urllib.urlencode urllib.parse
- # urllib.urlopen urllib.request
- # urllib.URLOpener urllib.request
- # urllib.urlretrieve urllib.request
- # urllib2.AbstractBasicAuthHandler urllib.request
- # urllib2.AbstractDigestAuthHandler urllib.request
- # urllib2.BaseHandler urllib.request
- # urllib2.build_opener urllib.request
- # urllib2.CacheFTPHandler urllib.request
- # urllib2.FileHandler urllib.request
- # urllib2.FTPHandler urllib.request
- # urllib2.HTTPBasicAuthHandler urllib.request
- # urllib2.HTTPCookieProcessor urllib.request
- # urllib2.HTTPDefaultErrorHandler urllib.request
- # urllib2.HTTPDigestAuthHandler urllib.request
- # urllib2.HTTPError urllib.request
- # urllib2.HTTPHandler urllib.request
- # urllib2.HTTPPasswordMgr urllib.request
- # urllib2.HTTPPasswordMgrWithDefaultRealm urllib.request
- # urllib2.HTTPRedirectHandler urllib.request
- # urllib2.HTTPSHandler urllib.request
- # urllib2.install_opener urllib.request
- # urllib2.OpenerDirector urllib.request
- # urllib2.ProxyBasicAuthHandler urllib.request
- # urllib2.ProxyDigestAuthHandler urllib.request
- # urllib2.ProxyHandler urllib.request
- # urllib2.Request urllib.request
- # urllib2.UnknownHandler urllib.request
- # urllib2.URLError urllib.request
- # urllib2.urlopen urllib.request
- # urlparse.parse_qs urllib.parse
- # urlparse.parse_qsl urllib.parse
- # urlparse.urldefrag urllib.parse
- # urlparse.urljoin urllib.parse
- # urlparse.urlparse urllib.parse
- # urlparse.urlsplit urllib.parse
- # urlparse.urlunparse urllib.parse
- # urlparse.urlunsplit urllib.parse
- ]
-
-
-class enable_hooks(object):
- """
- Acts as a context manager. Use like this:
-
- >>> from future import standard_library
- >>> with standard_library.enable_hooks():
- ... import http.client
- >>> import requests # incompatible with ``future``'s standard library hooks
- """
- def __enter__(self):
- print('Entering CM')
- self.hooks_were_installed = detect_hooks()
- install_hooks()
- return self
-
- def __exit__(self, *args):
- print('Exiting CM')
- if not self.hooks_were_installed:
- remove_hooks()
-
-
-class suspend_hooks(object):
- """
- Acts as a context manager. Use like this:
-
- >>> from future import standard_library
- >>> standard_library.install_hooks()
- >>> import http.client
- >>> # ...
- >>> with standard_library.suspend_hooks():
- >>> import requests # incompatible with ``future``'s standard library hooks
-
- If the hooks were disabled before the context, they are not installed when
- the context is left.
- """
- def __enter__(self):
- self.hooks_were_installed = detect_hooks()
- remove_hooks()
- return self
- def __exit__(self, *args):
- if not self.hooks_were_installed:
- install_hooks()
-
-
-def install_hooks():
- print('sys.meta_path was: {}'.format(sys.meta_path))
- print('Installing hooks ...')
- if utils.PY3:
- return
- for (newmodname, newobjname, oldmodname, oldobjname) in MOVES:
- newmod = __import__(newmodname)
- oldmod = __import__(oldmodname)
- obj = getattr(oldmod, oldobjname)
- setattr(newmod, newobjname, obj)
-
- # Add it unless it's there already
- newhook = RenameImport(RENAMES)
- if not detect_hooks():
- sys.meta_path.append(newhook)
- print('sys.meta_path is now: {}'.format(sys.meta_path))
-
-
-def remove_hooks():
- """
- Use to remove the ``future.standard_library`` import hooks.
- """
- print('sys.meta_path was: {}'.format(sys.meta_path))
- print('Uninstalling hooks ...')
- if not utils.PY3:
- # Loop backwards, so deleting items keeps the ordering:
- for i, hook in list(enumerate(sys.meta_path))[::-1]:
- if hasattr(hook, 'RENAMER'):
- del sys.meta_path[i]
- print('sys.meta_path is now: {}'.format(sys.meta_path))
-
-
-def disable_hooks():
- """
- Deprecated. Use remove_hooks() instead. This will be removed by
- ``future`` v1.0.
- """
- remove_hooks()
-
-
-def detect_hooks():
- """
- Returns True if the import hooks are installed, False if not.
- """
- print('Detecting hooks ...')
- present = any([hasattr(hook, 'RENAMER') for hook in sys.meta_path])
- if present:
- print('Detected.')
- else:
- print('Not detected.')
- return present
-
-
-# Now import the modules:
-# with enable_hooks():
-# for (oldname, newname) in RENAMES.items():
-# if newname == 'winreg' and sys.platform not in ['win32', 'win64']:
-# continue
-# if newname in REPLACED_MODULES:
-# # Skip this check for e.g. the stdlib's ``test`` module,
-# # which we have replaced completely.
-# continue
-# newmod = __import__(newname)
-# globals()[newname] = newmod
-
-
-if not utils.PY3:
- install_hooks()
diff --git a/future/standard_library/test/regrtest.py b/future/standard_library/test/regrtest.py
deleted file mode 100755
index 26f27ff3..00000000
--- a/future/standard_library/test/regrtest.py
+++ /dev/null
@@ -1,1564 +0,0 @@
-#! /usr/bin/python2.7
-
-"""
-Usage:
-
-python -m test.regrtest [options] [test_name1 [test_name2 ...]]
-python path/to/Lib/test/regrtest.py [options] [test_name1 [test_name2 ...]]
-
-
-If no arguments or options are provided, finds all files matching
-the pattern "test_*" in the Lib/test subdirectory and runs
-them in alphabetical order (but see -M and -u, below, for exceptions).
-
-For more rigorous testing, it is useful to use the following
-command line:
-
-python -E -tt -Wd -3 -m test.regrtest [options] [test_name1 ...]
-
-
-Options:
-
--h/--help -- print this text and exit
-
-Verbosity
-
--v/--verbose -- run tests in verbose mode with output to stdout
--w/--verbose2 -- re-run failed tests in verbose mode
--W/--verbose3 -- re-run failed tests in verbose mode immediately
--q/--quiet -- no output unless one or more tests fail
--S/--slow -- print the slowest 10 tests
- --header -- print header with interpreter info
-
-Selecting tests
-
--r/--randomize -- randomize test execution order (see below)
- --randseed -- pass a random seed to reproduce a previous random run
--f/--fromfile -- read names of tests to run from a file (see below)
--x/--exclude -- arguments are tests to *exclude*
--s/--single -- single step through a set of tests (see below)
--u/--use RES1,RES2,...
- -- specify which special resource intensive tests to run
--M/--memlimit LIMIT
- -- run very large memory-consuming tests
-
-Special runs
-
--l/--findleaks -- if GC is available detect tests that leak memory
--L/--runleaks -- run the leaks(1) command just before exit
--R/--huntrleaks RUNCOUNTS
- -- search for reference leaks (needs debug build, v. slow)
--j/--multiprocess PROCESSES
- -- run PROCESSES processes at once
--T/--coverage -- turn on code coverage tracing using the trace module
--D/--coverdir DIRECTORY
- -- Directory where coverage files are put
--N/--nocoverdir -- Put coverage files alongside modules
--t/--threshold THRESHOLD
- -- call gc.set_threshold(THRESHOLD)
--F/--forever -- run the specified tests in a loop, until an error happens
-
-
-Additional Option Details:
-
--r randomizes test execution order. You can use --randseed=int to provide a
-int seed value for the randomizer; this is useful for reproducing troublesome
-test orders.
-
--s On the first invocation of regrtest using -s, the first test file found
-or the first test file given on the command line is run, and the name of
-the next test is recorded in a file named pynexttest. If run from the
-Python build directory, pynexttest is located in the 'build' subdirectory,
-otherwise it is located in tempfile.gettempdir(). On subsequent runs,
-the test in pynexttest is run, and the next test is written to pynexttest.
-When the last test has been run, pynexttest is deleted. In this way it
-is possible to single step through the test files. This is useful when
-doing memory analysis on the Python interpreter, which process tends to
-consume too many resources to run the full regression test non-stop.
-
--f reads the names of tests from the file given as f's argument, one
-or more test names per line. Whitespace is ignored. Blank lines and
-lines beginning with '#' are ignored. This is especially useful for
-whittling down failures involving interactions among tests.
-
--L causes the leaks(1) command to be run just before exit if it exists.
-leaks(1) is available on Mac OS X and presumably on some other
-FreeBSD-derived systems.
-
--R runs each test several times and examines sys.gettotalrefcount() to
-see if the test appears to be leaking references. The argument should
-be of the form stab:run:fname where 'stab' is the number of times the
-test is run to let gettotalrefcount settle down, 'run' is the number
-of times further it is run and 'fname' is the name of the file the
-reports are written to. These parameters all have defaults (5, 4 and
-"reflog.txt" respectively), and the minimal invocation is '-R :'.
-
--M runs tests that require an exorbitant amount of memory. These tests
-typically try to ascertain containers keep working when containing more than
-2 billion objects, which only works on 64-bit systems. There are also some
-tests that try to exhaust the address space of the process, which only makes
-sense on 32-bit systems with at least 2Gb of memory. The passed-in memlimit,
-which is a string in the form of '2.5Gb', determines howmuch memory the
-tests will limit themselves to (but they may go slightly over.) The number
-shouldn't be more memory than the machine has (including swap memory). You
-should also keep in mind that swap memory is generally much, much slower
-than RAM, and setting memlimit to all available RAM or higher will heavily
-tax the machine. On the other hand, it is no use running these tests with a
-limit of less than 2.5Gb, and many require more than 20Gb. Tests that expect
-to use more than memlimit memory will be skipped. The big-memory tests
-generally run very, very long.
-
--u is used to specify which special resource intensive tests to run,
-such as those requiring large file support or network connectivity.
-The argument is a comma-separated list of words indicating the
-resources to test. Currently only the following are defined:
-
- all - Enable all special resources.
-
- audio - Tests that use the audio device. (There are known
- cases of broken audio drivers that can crash Python or
- even the Linux kernel.)
-
- curses - Tests that use curses and will modify the terminal's
- state and output modes.
-
- largefile - It is okay to run some test that may create huge
- files. These tests can take a long time and may
- consume >2GB of disk space temporarily.
-
- network - It is okay to run tests that use external network
- resource, e.g. testing SSL support for sockets.
-
- bsddb - It is okay to run the bsddb testsuite, which takes
- a long time to complete.
-
- decimal - Test the decimal module against a large suite that
- verifies compliance with standards.
-
- cpu - Used for certain CPU-heavy tests.
-
- subprocess Run all tests for the subprocess module.
-
- urlfetch - It is okay to download files required on testing.
-
- gui - Run tests that require a running GUI.
-
- xpickle - Test pickle and cPickle against Python 2.4, 2.5 and 2.6 to
- test backwards compatibility. These tests take a long time
- to run.
-
-To enable all resources except one, use '-uall,-'. For
-example, to run all the tests except for the bsddb tests, give the
-option '-uall,-bsddb'.
-"""
-
-from __future__ import print_function
-
-import StringIO
-import getopt
-import json
-import os
-import random
-import re
-import shutil
-import sys
-import time
-import traceback
-import warnings
-import unittest
-import tempfile
-import imp
-import platform
-import sysconfig
-
-
-# Some times __path__ and __file__ are not absolute (e.g. while running from
-# Lib/) and, if we change the CWD to run the tests in a temporary dir, some
-# imports might fail. This affects only the modules imported before os.chdir().
-# These modules are searched first in sys.path[0] (so '' -- the CWD) and if
-# they are found in the CWD their __file__ and __path__ will be relative (this
-# happens before the chdir). All the modules imported after the chdir, are
-# not found in the CWD, and since the other paths in sys.path[1:] are absolute
-# (site.py absolutize them), the __file__ and __path__ will be absolute too.
-# Therefore it is necessary to absolutize manually the __file__ and __path__ of
-# the packages to prevent later imports to fail when the CWD is different.
-for module in sys.modules.itervalues():
- if hasattr(module, '__path__'):
- module.__path__ = [os.path.abspath(path) for path in module.__path__]
- if hasattr(module, '__file__'):
- module.__file__ = os.path.abspath(module.__file__)
-
-
-# MacOSX (a.k.a. Darwin) has a default stack size that is too small
-# for deeply recursive regular expressions. We see this as crashes in
-# the Python test suite when running test_re.py and test_sre.py. The
-# fix is to set the stack limit to 2048.
-# This approach may also be useful for other Unixy platforms that
-# suffer from small default stack limits.
-if sys.platform == 'darwin':
- try:
- import resource
- except ImportError:
- pass
- else:
- soft, hard = resource.getrlimit(resource.RLIMIT_STACK)
- newsoft = min(hard, max(soft, 1024*2048))
- resource.setrlimit(resource.RLIMIT_STACK, (newsoft, hard))
-
-# Test result constants.
-PASSED = 1
-FAILED = 0
-ENV_CHANGED = -1
-SKIPPED = -2
-RESOURCE_DENIED = -3
-INTERRUPTED = -4
-
-from test import test_support
-
-RESOURCE_NAMES = ('audio', 'curses', 'largefile', 'network', 'bsddb',
- 'decimal', 'cpu', 'subprocess', 'urlfetch', 'gui',
- 'xpickle')
-
-TEMPDIR = os.path.abspath(tempfile.gettempdir())
-
-
-def usage(code, msg=''):
- print(__doc__)
- if msg: print(msg)
- sys.exit(code)
-
-
-def main(tests=None, testdir=None, verbose=0, quiet=False,
- exclude=False, single=False, randomize=False, fromfile=None,
- findleaks=False, use_resources=None, trace=False, coverdir='coverage',
- runleaks=False, huntrleaks=False, verbose2=False, print_slow=False,
- random_seed=None, use_mp=None, verbose3=False, forever=False,
- header=False):
- """Execute a test suite.
-
- This also parses command-line options and modifies its behavior
- accordingly.
-
- tests -- a list of strings containing test names (optional)
- testdir -- the directory in which to look for tests (optional)
-
- Users other than the Python test suite will certainly want to
- specify testdir; if it's omitted, the directory containing the
- Python test suite is searched for.
-
- If the tests argument is omitted, the tests listed on the
- command-line will be used. If that's empty, too, then all *.py
- files beginning with test_ will be used.
-
- The other default arguments (verbose, quiet, exclude,
- single, randomize, findleaks, use_resources, trace, coverdir,
- print_slow, and random_seed) allow programmers calling main()
- directly to set the values that would normally be set by flags
- on the command line.
- """
-
- test_support.record_original_stdout(sys.stdout)
- try:
- opts, args = getopt.getopt(sys.argv[1:], 'hvqxsSrf:lu:t:TD:NLR:FwWM:j:',
- ['help', 'verbose', 'verbose2', 'verbose3', 'quiet',
- 'exclude', 'single', 'slow', 'randomize', 'fromfile=', 'findleaks',
- 'use=', 'threshold=', 'trace', 'coverdir=', 'nocoverdir',
- 'runleaks', 'huntrleaks=', 'memlimit=', 'randseed=',
- 'multiprocess=', 'slaveargs=', 'forever', 'header'])
- except getopt.error as msg:
- usage(2, msg)
-
- # Defaults
- if random_seed is None:
- random_seed = random.randrange(10000000)
- if use_resources is None:
- use_resources = []
- for o, a in opts:
- if o in ('-h', '--help'):
- usage(0)
- elif o in ('-v', '--verbose'):
- verbose += 1
- elif o in ('-w', '--verbose2'):
- verbose2 = True
- elif o in ('-W', '--verbose3'):
- verbose3 = True
- elif o in ('-q', '--quiet'):
- quiet = True;
- verbose = 0
- elif o in ('-x', '--exclude'):
- exclude = True
- elif o in ('-s', '--single'):
- single = True
- elif o in ('-S', '--slow'):
- print_slow = True
- elif o in ('-r', '--randomize'):
- randomize = True
- elif o == '--randseed':
- random_seed = int(a)
- elif o in ('-f', '--fromfile'):
- fromfile = a
- elif o in ('-l', '--findleaks'):
- findleaks = True
- elif o in ('-L', '--runleaks'):
- runleaks = True
- elif o in ('-t', '--threshold'):
- import gc
- gc.set_threshold(int(a))
- elif o in ('-T', '--coverage'):
- trace = True
- elif o in ('-D', '--coverdir'):
- coverdir = os.path.join(os.getcwd(), a)
- elif o in ('-N', '--nocoverdir'):
- coverdir = None
- elif o in ('-R', '--huntrleaks'):
- huntrleaks = a.split(':')
- if len(huntrleaks) not in (2, 3):
- print(a, huntrleaks)
- usage(2, '-R takes 2 or 3 colon-separated arguments')
- if not huntrleaks[0]:
- huntrleaks[0] = 5
- else:
- huntrleaks[0] = int(huntrleaks[0])
- if not huntrleaks[1]:
- huntrleaks[1] = 4
- else:
- huntrleaks[1] = int(huntrleaks[1])
- if len(huntrleaks) == 2 or not huntrleaks[2]:
- huntrleaks[2:] = ["reflog.txt"]
- elif o in ('-M', '--memlimit'):
- test_support.set_memlimit(a)
- elif o in ('-u', '--use'):
- u = [x.lower() for x in a.split(',')]
- for r in u:
- if r == 'all':
- use_resources[:] = RESOURCE_NAMES
- continue
- remove = False
- if r[0] == '-':
- remove = True
- r = r[1:]
- if r not in RESOURCE_NAMES:
- usage(1, 'Invalid -u/--use option: ' + a)
- if remove:
- if r in use_resources:
- use_resources.remove(r)
- elif r not in use_resources:
- use_resources.append(r)
- elif o in ('-F', '--forever'):
- forever = True
- elif o in ('-j', '--multiprocess'):
- use_mp = int(a)
- elif o == '--header':
- header = True
- elif o == '--slaveargs':
- args, kwargs = json.loads(a)
- try:
- result = runtest(*args, **kwargs)
- except BaseException as e:
- result = INTERRUPTED, e.__class__.__name__
- print() # Force a newline (just in case)
- print(json.dumps(result))
- sys.exit(0)
- else:
- print(("No handler for option {0}. Please "
- "report this as a bug at http://bugs.python.org.").format(o), file=sys.stderr)
- sys.exit(1)
- if single and fromfile:
- usage(2, "-s and -f don't go together!")
- if use_mp and trace:
- usage(2, "-T and -j don't go together!")
- if use_mp and findleaks:
- usage(2, "-l and -j don't go together!")
-
- good = []
- bad = []
- skipped = []
- resource_denieds = []
- environment_changed = []
- interrupted = False
-
- if findleaks:
- try:
- import gc
- except ImportError:
- print('No GC available, disabling findleaks.')
- findleaks = False
- else:
- # Uncomment the line below to report garbage that is not
- # freeable by reference counting alone. By default only
- # garbage that is not collectable by the GC is reported.
- #gc.set_debug(gc.DEBUG_SAVEALL)
- found_garbage = []
-
- if single:
- filename = os.path.join(TEMPDIR, 'pynexttest')
- try:
- fp = open(filename, 'r')
- next_test = fp.read().strip()
- tests = [next_test]
- fp.close()
- except IOError:
- pass
-
- if fromfile:
- tests = []
- fp = open(os.path.join(test_support.SAVEDCWD, fromfile))
- for line in fp:
- guts = line.split() # assuming no test has whitespace in its name
- if guts and not guts[0].startswith('#'):
- tests.extend(guts)
- fp.close()
-
- # Strip .py extensions.
- removepy(args)
- removepy(tests)
-
- stdtests = STDTESTS[:]
- nottests = NOTTESTS.copy()
- if exclude:
- for arg in args:
- if arg in stdtests:
- stdtests.remove(arg)
- nottests.add(arg)
- args = []
-
- # For a partial run, we do not need to clutter the output.
- if verbose or header or not (quiet or single or tests or args):
- # Print basic platform information
- print("==", platform.python_implementation(), \
- " ".join(sys.version.split()))
- print("== ", platform.platform(aliased=True), \
- "%s-endian" % sys.byteorder)
- print("== ", os.getcwd())
- print("Testing with flags:", sys.flags)
-
- alltests = findtests(testdir, stdtests, nottests)
- selected = tests or args or alltests
- if single:
- selected = selected[:1]
- try:
- next_single_test = alltests[alltests.index(selected[0])+1]
- except IndexError:
- next_single_test = None
- if randomize:
- random.seed(random_seed)
- print("Using random seed", random_seed)
- random.shuffle(selected)
- if trace:
- import trace
- tracer = trace.Trace(ignoredirs=[sys.prefix, sys.exec_prefix],
- trace=False, count=True)
-
- test_times = []
- test_support.use_resources = use_resources
- save_modules = sys.modules.keys()
-
- def accumulate_result(test, result):
- ok, test_time = result
- test_times.append((test_time, test))
- if ok == PASSED:
- good.append(test)
- elif ok == FAILED:
- bad.append(test)
- elif ok == ENV_CHANGED:
- bad.append(test)
- environment_changed.append(test)
- elif ok == SKIPPED:
- skipped.append(test)
- elif ok == RESOURCE_DENIED:
- skipped.append(test)
- resource_denieds.append(test)
-
- if forever:
- def test_forever(tests=list(selected)):
- while True:
- for test in tests:
- yield test
- if bad:
- return
- tests = test_forever()
- else:
- tests = iter(selected)
-
- if use_mp:
- try:
- from threading import Thread
- except ImportError:
- print("Multiprocess option requires thread support")
- sys.exit(2)
- from Queue import Queue
- from subprocess import Popen, PIPE
- debug_output_pat = re.compile(r"\[\d+ refs\]$")
- output = Queue()
- def tests_and_args():
- for test in tests:
- args_tuple = (
- (test, verbose, quiet),
- dict(huntrleaks=huntrleaks, use_resources=use_resources)
- )
- yield (test, args_tuple)
- pending = tests_and_args()
- opt_args = test_support.args_from_interpreter_flags()
- base_cmd = [sys.executable] + opt_args + ['-m', 'test.regrtest']
- def work():
- # A worker thread.
- try:
- while True:
- try:
- test, args_tuple = next(pending)
- except StopIteration:
- output.put((None, None, None, None))
- return
- # -E is needed by some tests, e.g. test_import
- popen = Popen(base_cmd + ['--slaveargs', json.dumps(args_tuple)],
- stdout=PIPE, stderr=PIPE,
- universal_newlines=True,
- close_fds=(os.name != 'nt'))
- stdout, stderr = popen.communicate()
- # Strip last refcount output line if it exists, since it
- # comes from the shutdown of the interpreter in the subcommand.
- stderr = debug_output_pat.sub("", stderr)
- stdout, _, result = stdout.strip().rpartition("\n")
- if not result:
- output.put((None, None, None, None))
- return
- result = json.loads(result)
- if not quiet:
- stdout = test+'\n'+stdout
- output.put((test, stdout.rstrip(), stderr.rstrip(), result))
- except BaseException:
- output.put((None, None, None, None))
- raise
- workers = [Thread(target=work) for i in range(use_mp)]
- for worker in workers:
- worker.start()
- finished = 0
- try:
- while finished < use_mp:
- test, stdout, stderr, result = output.get()
- if test is None:
- finished += 1
- continue
- if stdout:
- print(stdout)
- if stderr:
- print(stderr, file=sys.stderr)
- sys.stdout.flush()
- sys.stderr.flush()
- if result[0] == INTERRUPTED:
- assert result[1] == 'KeyboardInterrupt'
- raise KeyboardInterrupt # What else?
- accumulate_result(test, result)
- except KeyboardInterrupt:
- interrupted = True
- pending.close()
- for worker in workers:
- worker.join()
- else:
- for test in tests:
- if not quiet:
- print(test)
- sys.stdout.flush()
- if trace:
- # If we're tracing code coverage, then we don't exit with status
- # if on a false return value from main.
- tracer.runctx('runtest(test, verbose, quiet)',
- globals=globals(), locals=vars())
- else:
- try:
- result = runtest(test, verbose, quiet, huntrleaks)
- accumulate_result(test, result)
- if verbose3 and result[0] == FAILED:
- print("Re-running test %r in verbose mode" % test)
- runtest(test, True, quiet, huntrleaks)
- except KeyboardInterrupt:
- interrupted = True
- break
- except:
- raise
- if findleaks:
- gc.collect()
- if gc.garbage:
- print("Warning: test created", len(gc.garbage), end=' ')
- print("uncollectable object(s).")
- # move the uncollectable objects somewhere so we don't see
- # them again
- found_garbage.extend(gc.garbage)
- del gc.garbage[:]
- # Unload the newly imported modules (best effort finalization)
- for module in sys.modules.keys():
- if module not in save_modules and module.startswith("test."):
- test_support.unload(module)
-
- if interrupted:
- # print a newline after ^C
- print()
- print("Test suite interrupted by signal SIGINT.")
- omitted = set(selected) - set(good) - set(bad) - set(skipped)
- print(count(len(omitted), "test"), "omitted:")
- printlist(omitted)
- if good and not quiet:
- if not bad and not skipped and not interrupted and len(good) > 1:
- print("All", end=' ')
- print(count(len(good), "test"), "OK.")
- if print_slow:
- test_times.sort(reverse=True)
- print("10 slowest tests:")
- for time, test in test_times[:10]:
- print("%s: %.1fs" % (test, time))
- if bad:
- bad = set(bad) - set(environment_changed)
- if bad:
- print(count(len(bad), "test"), "failed:")
- printlist(bad)
- if environment_changed:
- print("{0} altered the execution environment:".format(
- count(len(environment_changed), "test")))
- printlist(environment_changed)
- if skipped and not quiet:
- print(count(len(skipped), "test"), "skipped:")
- printlist(skipped)
-
- e = _ExpectedSkips()
- plat = sys.platform
- if e.isvalid():
- surprise = set(skipped) - e.getexpected() - set(resource_denieds)
- if surprise:
- print(count(len(surprise), "skip"), \
- "unexpected on", plat + ":")
- printlist(surprise)
- else:
- print("Those skips are all expected on", plat + ".")
- else:
- print("Ask someone to teach regrtest.py about which tests are")
- print("expected to get skipped on", plat + ".")
-
- if verbose2 and bad:
- print("Re-running failed tests in verbose mode")
- for test in bad:
- print("Re-running test %r in verbose mode" % test)
- sys.stdout.flush()
- try:
- test_support.verbose = True
- ok = runtest(test, True, quiet, huntrleaks)
- except KeyboardInterrupt:
- # print a newline separate from the ^C
- print()
- break
- except:
- raise
-
- if single:
- if next_single_test:
- with open(filename, 'w') as fp:
- fp.write(next_single_test + '\n')
- else:
- os.unlink(filename)
-
- if trace:
- r = tracer.results()
- r.write_results(show_missing=True, summary=True, coverdir=coverdir)
-
- if runleaks:
- os.system("leaks %d" % os.getpid())
-
- sys.exit(len(bad) > 0 or interrupted)
-
-
-STDTESTS = [
- 'test_grammar',
- 'test_opcodes',
- 'test_dict',
- 'test_builtin',
- 'test_exceptions',
- 'test_types',
- 'test_unittest',
- 'test_doctest',
- 'test_doctest2',
-]
-
-NOTTESTS = set([
- 'test_support',
- 'test_future1',
- 'test_future2',
-])
-
-def findtests(testdir=None, stdtests=STDTESTS, nottests=NOTTESTS):
- """Return a list of all applicable test modules."""
- testdir = findtestdir(testdir)
- names = os.listdir(testdir)
- tests = []
- others = set(stdtests) | nottests
- for name in names:
- modname, ext = os.path.splitext(name)
- if modname[:5] == "test_" and ext == ".py" and modname not in others:
- tests.append(modname)
- return stdtests + sorted(tests)
-
-def runtest(test, verbose, quiet,
- huntrleaks=False, use_resources=None):
- """Run a single test.
-
- test -- the name of the test
- verbose -- if true, print more messages
- quiet -- if true, don't print 'skipped' messages (probably redundant)
- test_times -- a list of (time, test_name) pairs
- huntrleaks -- run multiple times to test for leaks; requires a debug
- build; a triple corresponding to -R's three arguments
- Returns one of the test result constants:
- INTERRUPTED KeyboardInterrupt when run under -j
- RESOURCE_DENIED test skipped because resource denied
- SKIPPED test skipped for some other reason
- ENV_CHANGED test failed because it changed the execution environment
- FAILED test failed
- PASSED test passed
- """
-
- test_support.verbose = verbose # Tell tests to be moderately quiet
- if use_resources is not None:
- test_support.use_resources = use_resources
- try:
- return runtest_inner(test, verbose, quiet, huntrleaks)
- finally:
- cleanup_test_droppings(test, verbose)
-
-
-# Unit tests are supposed to leave the execution environment unchanged
-# once they complete. But sometimes tests have bugs, especially when
-# tests fail, and the changes to environment go on to mess up other
-# tests. This can cause issues with buildbot stability, since tests
-# are run in random order and so problems may appear to come and go.
-# There are a few things we can save and restore to mitigate this, and
-# the following context manager handles this task.
-
-class saved_test_environment(object):
- """Save bits of the test environment and restore them at block exit.
-
- with saved_test_environment(testname, verbose, quiet):
- #stuff
-
- Unless quiet is True, a warning is printed to stderr if any of
- the saved items was changed by the test. The attribute 'changed'
- is initially False, but is set to True if a change is detected.
-
- If verbose is more than 1, the before and after state of changed
- items is also printed.
- """
-
- changed = False
-
- def __init__(self, testname, verbose=0, quiet=False):
- self.testname = testname
- self.verbose = verbose
- self.quiet = quiet
-
- # To add things to save and restore, add a name XXX to the resources list
- # and add corresponding get_XXX/restore_XXX functions. get_XXX should
- # return the value to be saved and compared against a second call to the
- # get function when test execution completes. restore_XXX should accept
- # the saved value and restore the resource using it. It will be called if
- # and only if a change in the value is detected.
- #
- # Note: XXX will have any '.' replaced with '_' characters when determining
- # the corresponding method names.
-
- resources = ('sys.argv', 'cwd', 'sys.stdin', 'sys.stdout', 'sys.stderr',
- 'os.environ', 'sys.path', 'asyncore.socket_map',
- 'test_support.TESTFN',
- )
-
- def get_sys_argv(self):
- return id(sys.argv), sys.argv, sys.argv[:]
- def restore_sys_argv(self, saved_argv):
- sys.argv = saved_argv[1]
- sys.argv[:] = saved_argv[2]
-
- def get_cwd(self):
- return os.getcwd()
- def restore_cwd(self, saved_cwd):
- os.chdir(saved_cwd)
-
- def get_sys_stdout(self):
- return sys.stdout
- def restore_sys_stdout(self, saved_stdout):
- sys.stdout = saved_stdout
-
- def get_sys_stderr(self):
- return sys.stderr
- def restore_sys_stderr(self, saved_stderr):
- sys.stderr = saved_stderr
-
- def get_sys_stdin(self):
- return sys.stdin
- def restore_sys_stdin(self, saved_stdin):
- sys.stdin = saved_stdin
-
- def get_os_environ(self):
- return id(os.environ), os.environ, dict(os.environ)
- def restore_os_environ(self, saved_environ):
- os.environ = saved_environ[1]
- os.environ.clear()
- os.environ.update(saved_environ[2])
-
- def get_sys_path(self):
- return id(sys.path), sys.path, sys.path[:]
- def restore_sys_path(self, saved_path):
- sys.path = saved_path[1]
- sys.path[:] = saved_path[2]
-
- def get_asyncore_socket_map(self):
- asyncore = sys.modules.get('asyncore')
- # XXX Making a copy keeps objects alive until __exit__ gets called.
- return asyncore and asyncore.socket_map.copy() or {}
- def restore_asyncore_socket_map(self, saved_map):
- asyncore = sys.modules.get('asyncore')
- if asyncore is not None:
- asyncore.close_all(ignore_all=True)
- asyncore.socket_map.update(saved_map)
-
- def get_test_support_TESTFN(self):
- if os.path.isfile(test_support.TESTFN):
- result = 'f'
- elif os.path.isdir(test_support.TESTFN):
- result = 'd'
- else:
- result = None
- return result
- def restore_test_support_TESTFN(self, saved_value):
- if saved_value is None:
- if os.path.isfile(test_support.TESTFN):
- os.unlink(test_support.TESTFN)
- elif os.path.isdir(test_support.TESTFN):
- shutil.rmtree(test_support.TESTFN)
-
- def resource_info(self):
- for name in self.resources:
- method_suffix = name.replace('.', '_')
- get_name = 'get_' + method_suffix
- restore_name = 'restore_' + method_suffix
- yield name, getattr(self, get_name), getattr(self, restore_name)
-
- def __enter__(self):
- self.saved_values = dict((name, get()) for name, get, restore
- in self.resource_info())
- return self
-
- def __exit__(self, exc_type, exc_val, exc_tb):
- saved_values = self.saved_values
- del self.saved_values
- for name, get, restore in self.resource_info():
- current = get()
- original = saved_values.pop(name)
- # Check for changes to the resource's value
- if current != original:
- self.changed = True
- restore(original)
- if not self.quiet:
- print((
- "Warning -- {0} was modified by {1}".format(
- name, self.testname)), file=sys.stderr)
- if self.verbose > 1:
- print((
- " Before: {0}\n After: {1} ".format(
- original, current)), file=sys.stderr)
- # XXX (ncoghlan): for most resources (e.g. sys.path) identity
- # matters at least as much as value. For others (e.g. cwd),
- # identity is irrelevant. Should we add a mechanism to check
- # for substitution in the cases where it matters?
- return False
-
-
-def runtest_inner(test, verbose, quiet, huntrleaks=False):
- test_support.unload(test)
- if verbose:
- capture_stdout = None
- else:
- capture_stdout = StringIO.StringIO()
-
- test_time = 0.0
- refleak = False # True if the test leaked references.
- try:
- save_stdout = sys.stdout
- try:
- if capture_stdout:
- sys.stdout = capture_stdout
- if test.startswith('test.'):
- abstest = test
- else:
- # Always import it from the test package
- abstest = 'test.' + test
- with saved_test_environment(test, verbose, quiet) as environment:
- start_time = time.time()
- the_package = __import__(abstest, globals(), locals(), [])
- the_module = getattr(the_package, test)
- # Old tests run to completion simply as a side-effect of
- # being imported. For tests based on unittest or doctest,
- # explicitly invoke their test_main() function (if it exists).
- indirect_test = getattr(the_module, "test_main", None)
- if indirect_test is not None:
- indirect_test()
- if huntrleaks:
- refleak = dash_R(the_module, test, indirect_test,
- huntrleaks)
- test_time = time.time() - start_time
- finally:
- sys.stdout = save_stdout
- except test_support.ResourceDenied as msg:
- if not quiet:
- print(test, "skipped --", msg)
- sys.stdout.flush()
- return RESOURCE_DENIED, test_time
- except unittest.SkipTest as msg:
- if not quiet:
- print(test, "skipped --", msg)
- sys.stdout.flush()
- return SKIPPED, test_time
- except KeyboardInterrupt:
- raise
- except test_support.TestFailed as msg:
- print("test", test, "failed --", msg, file=sys.stderr)
- sys.stderr.flush()
- return FAILED, test_time
- except:
- type, value = sys.exc_info()[:2]
- print("test", test, "crashed --", str(type) + ":", value, file=sys.stderr)
- sys.stderr.flush()
- if verbose:
- traceback.print_exc(file=sys.stderr)
- sys.stderr.flush()
- return FAILED, test_time
- else:
- if refleak:
- return FAILED, test_time
- if environment.changed:
- return ENV_CHANGED, test_time
- # Except in verbose mode, tests should not print anything
- if verbose or huntrleaks:
- return PASSED, test_time
- output = capture_stdout.getvalue()
- if not output:
- return PASSED, test_time
- print("test", test, "produced unexpected output:")
- print("*" * 70)
- print(output)
- print("*" * 70)
- sys.stdout.flush()
- return FAILED, test_time
-
-def cleanup_test_droppings(testname, verbose):
- import stat
- import gc
-
- # First kill any dangling references to open files etc.
- gc.collect()
-
- # Try to clean up junk commonly left behind. While tests shouldn't leave
- # any files or directories behind, when a test fails that can be tedious
- # for it to arrange. The consequences can be especially nasty on Windows,
- # since if a test leaves a file open, it cannot be deleted by name (while
- # there's nothing we can do about that here either, we can display the
- # name of the offending test, which is a real help).
- for name in (test_support.TESTFN,
- "db_home",
- ):
- if not os.path.exists(name):
- continue
-
- if os.path.isdir(name):
- kind, nuker = "directory", shutil.rmtree
- elif os.path.isfile(name):
- kind, nuker = "file", os.unlink
- else:
- raise SystemError("os.path says %r exists but is neither "
- "directory nor file" % name)
-
- if verbose:
- print("%r left behind %s %r" % (testname, kind, name))
- try:
- # if we have chmod, fix possible permissions problems
- # that might prevent cleanup
- if (hasattr(os, 'chmod')):
- os.chmod(name, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
- nuker(name)
- except Exception as msg:
- print(("%r left behind %s %r and it couldn't be "
- "removed: %s" % (testname, kind, name, msg)), file=sys.stderr)
-
-def dash_R(the_module, test, indirect_test, huntrleaks):
- """Run a test multiple times, looking for reference leaks.
-
- Returns:
- False if the test didn't leak references; True if we detected refleaks.
- """
- # This code is hackish and inelegant, but it seems to do the job.
- import copy_reg, _abcoll, _pyio
-
- if not hasattr(sys, 'gettotalrefcount'):
- raise Exception("Tracking reference leaks requires a debug build "
- "of Python")
-
- # Save current values for dash_R_cleanup() to restore.
- fs = warnings.filters[:]
- ps = copy_reg.dispatch_table.copy()
- pic = sys.path_importer_cache.copy()
- try:
- import zipimport
- except ImportError:
- zdc = None # Run unmodified on platforms without zipimport support
- else:
- zdc = zipimport._zip_directory_cache.copy()
- abcs = {}
- modules = _abcoll, _pyio
- for abc in [getattr(mod, a) for mod in modules for a in mod.__all__]:
- # XXX isinstance(abc, ABCMeta) leads to infinite recursion
- if not hasattr(abc, '_abc_registry'):
- continue
- for obj in abc.__subclasses__() + [abc]:
- abcs[obj] = obj._abc_registry.copy()
-
- if indirect_test:
- def run_the_test():
- indirect_test()
- else:
- def run_the_test():
- imp.reload(the_module)
-
- deltas = []
- nwarmup, ntracked, fname = huntrleaks
- fname = os.path.join(test_support.SAVEDCWD, fname)
- repcount = nwarmup + ntracked
- print("beginning", repcount, "repetitions", file=sys.stderr)
- print(("1234567890"*(repcount//10 + 1))[:repcount], file=sys.stderr)
- dash_R_cleanup(fs, ps, pic, zdc, abcs)
- for i in range(repcount):
- rc_before = sys.gettotalrefcount()
- run_the_test()
- sys.stderr.write('.')
- dash_R_cleanup(fs, ps, pic, zdc, abcs)
- rc_after = sys.gettotalrefcount()
- if i >= nwarmup:
- deltas.append(rc_after - rc_before)
- print(file=sys.stderr)
- if any(deltas):
- msg = '%s leaked %s references, sum=%s' % (test, deltas, sum(deltas))
- print(msg, file=sys.stderr)
- with open(fname, "a") as refrep:
- print(msg, file=refrep)
- refrep.flush()
- return True
- return False
-
-def dash_R_cleanup(fs, ps, pic, zdc, abcs):
- import gc, copy_reg
- import _strptime, linecache
- dircache = test_support.import_module('dircache', deprecated=True)
- import urlparse, urllib, urllib2, mimetypes, doctest
- import struct, filecmp
- from distutils.dir_util import _path_created
-
- # Clear the warnings registry, so they can be displayed again
- for mod in sys.modules.values():
- if hasattr(mod, '__warningregistry__'):
- del mod.__warningregistry__
-
- # Restore some original values.
- warnings.filters[:] = fs
- copy_reg.dispatch_table.clear()
- copy_reg.dispatch_table.update(ps)
- sys.path_importer_cache.clear()
- sys.path_importer_cache.update(pic)
- try:
- import zipimport
- except ImportError:
- pass # Run unmodified on platforms without zipimport support
- else:
- zipimport._zip_directory_cache.clear()
- zipimport._zip_directory_cache.update(zdc)
-
- # clear type cache
- sys._clear_type_cache()
-
- # Clear ABC registries, restoring previously saved ABC registries.
- for abc, registry in abcs.items():
- abc._abc_registry = registry.copy()
- abc._abc_cache.clear()
- abc._abc_negative_cache.clear()
-
- # Clear assorted module caches.
- _path_created.clear()
- re.purge()
- _strptime._regex_cache.clear()
- urlparse.clear_cache()
- urllib.urlcleanup()
- urllib2.install_opener(None)
- dircache.reset()
- linecache.clearcache()
- mimetypes._default_mime_types()
- filecmp._cache.clear()
- struct._clearcache()
- doctest.master = None
- try:
- import ctypes
- except ImportError:
- # Don't worry about resetting the cache if ctypes is not supported
- pass
- else:
- ctypes._reset_cache()
-
- # Collect cyclic trash.
- gc.collect()
-
-def findtestdir(path=None):
- return path or os.path.dirname(__file__) or os.curdir
-
-def removepy(names):
- if not names:
- return
- for idx, name in enumerate(names):
- basename, ext = os.path.splitext(name)
- if ext == '.py':
- names[idx] = basename
-
-def count(n, word):
- if n == 1:
- return "%d %s" % (n, word)
- else:
- return "%d %ss" % (n, word)
-
-def printlist(x, width=70, indent=4):
- """Print the elements of iterable x to stdout.
-
- Optional arg width (default 70) is the maximum line length.
- Optional arg indent (default 4) is the number of blanks with which to
- begin each line.
- """
-
- from textwrap import fill
- blanks = ' ' * indent
- # Print the sorted list: 'x' may be a '--random' list or a set()
- print(fill(' '.join(str(elt) for elt in sorted(x)), width,
- initial_indent=blanks, subsequent_indent=blanks))
-
-# Map sys.platform to a string containing the basenames of tests
-# expected to be skipped on that platform.
-#
-# Special cases:
-# test_pep277
-# The _ExpectedSkips constructor adds this to the set of expected
-# skips if not os.path.supports_unicode_filenames.
-# test_timeout
-# Controlled by test_timeout.skip_expected. Requires the network
-# resource and a socket module.
-#
-# Tests that are expected to be skipped everywhere except on one platform
-# are also handled separately.
-
-_expectations = {
- 'win32':
- """
- test__locale
- test_bsddb185
- test_bsddb3
- test_commands
- test_crypt
- test_curses
- test_dbm
- test_dl
- test_fcntl
- test_fork1
- test_epoll
- test_gdbm
- test_grp
- test_ioctl
- test_largefile
- test_kqueue
- test_mhlib
- test_openpty
- test_ossaudiodev
- test_pipes
- test_poll
- test_posix
- test_pty
- test_pwd
- test_resource
- test_signal
- test_threadsignals
- test_timing
- test_wait3
- test_wait4
- """,
- 'linux2':
- """
- test_bsddb185
- test_curses
- test_dl
- test_largefile
- test_kqueue
- test_ossaudiodev
- """,
- 'unixware7':
- """
- test_bsddb
- test_bsddb185
- test_dl
- test_epoll
- test_largefile
- test_kqueue
- test_minidom
- test_openpty
- test_pyexpat
- test_sax
- test_sundry
- """,
- 'openunix8':
- """
- test_bsddb
- test_bsddb185
- test_dl
- test_epoll
- test_largefile
- test_kqueue
- test_minidom
- test_openpty
- test_pyexpat
- test_sax
- test_sundry
- """,
- 'sco_sv3':
- """
- test_asynchat
- test_bsddb
- test_bsddb185
- test_dl
- test_fork1
- test_epoll
- test_gettext
- test_largefile
- test_locale
- test_kqueue
- test_minidom
- test_openpty
- test_pyexpat
- test_queue
- test_sax
- test_sundry
- test_thread
- test_threaded_import
- test_threadedtempfile
- test_threading
- """,
- 'riscos':
- """
- test_asynchat
- test_atexit
- test_bsddb
- test_bsddb185
- test_bsddb3
- test_commands
- test_crypt
- test_dbm
- test_dl
- test_fcntl
- test_fork1
- test_epoll
- test_gdbm
- test_grp
- test_largefile
- test_locale
- test_kqueue
- test_mmap
- test_openpty
- test_poll
- test_popen2
- test_pty
- test_pwd
- test_strop
- test_sundry
- test_thread
- test_threaded_import
- test_threadedtempfile
- test_threading
- test_timing
- """,
- 'darwin':
- """
- test__locale
- test_bsddb
- test_bsddb3
- test_curses
- test_epoll
- test_gdb
- test_gdbm
- test_largefile
- test_locale
- test_kqueue
- test_minidom
- test_ossaudiodev
- test_poll
- """,
- 'sunos5':
- """
- test_bsddb
- test_bsddb185
- test_curses
- test_dbm
- test_epoll
- test_kqueue
- test_gdbm
- test_gzip
- test_openpty
- test_zipfile
- test_zlib
- """,
- 'hp-ux11':
- """
- test_bsddb
- test_bsddb185
- test_curses
- test_dl
- test_epoll
- test_gdbm
- test_gzip
- test_largefile
- test_locale
- test_kqueue
- test_minidom
- test_openpty
- test_pyexpat
- test_sax
- test_zipfile
- test_zlib
- """,
- 'atheos':
- """
- test_bsddb185
- test_curses
- test_dl
- test_gdbm
- test_epoll
- test_largefile
- test_locale
- test_kqueue
- test_mhlib
- test_mmap
- test_poll
- test_popen2
- test_resource
- """,
- 'cygwin':
- """
- test_bsddb185
- test_bsddb3
- test_curses
- test_dbm
- test_epoll
- test_ioctl
- test_kqueue
- test_largefile
- test_locale
- test_ossaudiodev
- test_socketserver
- """,
- 'os2emx':
- """
- test_audioop
- test_bsddb185
- test_bsddb3
- test_commands
- test_curses
- test_dl
- test_epoll
- test_kqueue
- test_largefile
- test_mhlib
- test_mmap
- test_openpty
- test_ossaudiodev
- test_pty
- test_resource
- test_signal
- """,
- 'freebsd4':
- """
- test_bsddb
- test_bsddb3
- test_epoll
- test_gdbm
- test_locale
- test_ossaudiodev
- test_pep277
- test_pty
- test_socketserver
- test_tcl
- test_tk
- test_ttk_guionly
- test_ttk_textonly
- test_timeout
- test_urllibnet
- test_multiprocessing
- """,
- 'aix5':
- """
- test_bsddb
- test_bsddb185
- test_bsddb3
- test_bz2
- test_dl
- test_epoll
- test_gdbm
- test_gzip
- test_kqueue
- test_ossaudiodev
- test_tcl
- test_tk
- test_ttk_guionly
- test_ttk_textonly
- test_zipimport
- test_zlib
- """,
- 'openbsd3':
- """
- test_ascii_formatd
- test_bsddb
- test_bsddb3
- test_ctypes
- test_dl
- test_epoll
- test_gdbm
- test_locale
- test_normalization
- test_ossaudiodev
- test_pep277
- test_tcl
- test_tk
- test_ttk_guionly
- test_ttk_textonly
- test_multiprocessing
- """,
- 'netbsd3':
- """
- test_ascii_formatd
- test_bsddb
- test_bsddb185
- test_bsddb3
- test_ctypes
- test_curses
- test_dl
- test_epoll
- test_gdbm
- test_locale
- test_ossaudiodev
- test_pep277
- test_tcl
- test_tk
- test_ttk_guionly
- test_ttk_textonly
- test_multiprocessing
- """,
-}
-_expectations['freebsd5'] = _expectations['freebsd4']
-_expectations['freebsd6'] = _expectations['freebsd4']
-_expectations['freebsd7'] = _expectations['freebsd4']
-_expectations['freebsd8'] = _expectations['freebsd4']
-
-class _ExpectedSkips(object):
- def __init__(self):
- import os.path
- from test import test_timeout
-
- self.valid = False
- if sys.platform in _expectations:
- s = _expectations[sys.platform]
- self.expected = set(s.split())
-
- # expected to be skipped on every platform, even Linux
- self.expected.add('test_linuxaudiodev')
-
- if not os.path.supports_unicode_filenames:
- self.expected.add('test_pep277')
-
- if test_timeout.skip_expected:
- self.expected.add('test_timeout')
-
- if sys.maxint == 9223372036854775807:
- self.expected.add('test_imageop')
-
- if sys.platform != "darwin":
- MAC_ONLY = ["test_macos", "test_macostools", "test_aepack",
- "test_plistlib", "test_scriptpackages",
- "test_applesingle"]
- for skip in MAC_ONLY:
- self.expected.add(skip)
- elif len(u'\0'.encode('unicode-internal')) == 4:
- self.expected.add("test_macostools")
-
-
- if sys.platform != "win32":
- # test_sqlite is only reliable on Windows where the library
- # is distributed with Python
- WIN_ONLY = ["test_unicode_file", "test_winreg",
- "test_winsound", "test_startfile",
- "test_sqlite", "test_msilib"]
- for skip in WIN_ONLY:
- self.expected.add(skip)
-
- if sys.platform != 'irix':
- IRIX_ONLY = ["test_imageop", "test_al", "test_cd", "test_cl",
- "test_gl", "test_imgfile"]
- for skip in IRIX_ONLY:
- self.expected.add(skip)
-
- if sys.platform != 'sunos5':
- self.expected.add('test_sunaudiodev')
- self.expected.add('test_nis')
-
- if not sys.py3kwarning:
- self.expected.add('test_py3kwarn')
-
- self.valid = True
-
- def isvalid(self):
- "Return true iff _ExpectedSkips knows about the current platform."
- return self.valid
-
- def getexpected(self):
- """Return set of test names we expect to skip on current platform.
-
- self.isvalid() must be true.
- """
-
- assert self.isvalid()
- return self.expected
-
-if __name__ == '__main__':
- # findtestdir() gets the dirname out of __file__, so we have to make it
- # absolute before changing the working directory.
- # For example __file__ may be relative when running trace or profile.
- # See issue #9323.
- __file__ = os.path.abspath(__file__)
-
- # sanity check
- assert __file__ == os.path.abspath(sys.argv[0])
-
- # When tests are run from the Python build directory, it is best practice
- # to keep the test files in a subfolder. It eases the cleanup of leftover
- # files using command "make distclean".
- if sysconfig.is_python_build():
- TEMPDIR = os.path.join(sysconfig.get_config_var('srcdir'), 'build')
- TEMPDIR = os.path.abspath(TEMPDIR)
- if not os.path.exists(TEMPDIR):
- os.mkdir(TEMPDIR)
-
- # Define a writable temp dir that will be used as cwd while running
- # the tests. The name of the dir includes the pid to allow parallel
- # testing (see the -j option).
- TESTCWD = 'test_python_{0}'.format(os.getpid())
-
- TESTCWD = os.path.join(TEMPDIR, TESTCWD)
-
- # Run the tests in a context manager that temporary changes the CWD to a
- # temporary and writable directory. If it's not possible to create or
- # change the CWD, the original CWD will be used. The original CWD is
- # available from test_support.SAVEDCWD.
- with test_support.temp_cwd(TESTCWD, quiet=True):
- main()
diff --git a/future/standard_library/test/string_tests.py b/future/standard_library/test/string_tests.py
deleted file mode 100644
index 96f3ea94..00000000
--- a/future/standard_library/test/string_tests.py
+++ /dev/null
@@ -1,1392 +0,0 @@
-"""
-Common tests shared by test_str, test_unicode, test_userstring and test_string.
-"""
-from __future__ import (absolute_import, division,
- print_function, unicode_literals)
-from future import standard_library
-from future.builtins import *
-
-import string
-import sys
-import struct
-from test import support
-from collections import UserList
-import _testcapi
-
-class Sequence(object):
- def __init__(self, seq='wxyz'): self.seq = seq
- def __len__(self): return len(self.seq)
- def __getitem__(self, i): return self.seq[i]
-
-class BadSeq1(Sequence):
- def __init__(self): self.seq = [7, 'hello', 123]
- def __str__(self): return '{0} {1} {2}'.format(*self.seq)
-
-class BadSeq2(Sequence):
- def __init__(self): self.seq = ['a', 'b', 'c']
- def __len__(self): return 8
-
-class BaseTest(object):
- # These tests are for buffers of values (bytes) and not
- # specific to character interpretation, used for bytes objects
- # and various string implementations
-
- # The type to be tested
- # Change in subclasses to change the behaviour of fixtesttype()
- type2test = None
-
- # Whether the "contained items" of the container are integers in
- # range(0, 256) (i.e. bytes, bytearray) or strings of length 1
- # (str)
- contains_bytes = False
-
- # All tests pass their arguments to the testing methods
- # as str objects. fixtesttype() can be used to propagate
- # these arguments to the appropriate type
- def fixtype(self, obj):
- if isinstance(obj, str):
- return self.__class__.type2test(obj)
- elif isinstance(obj, list):
- return [self.fixtype(x) for x in obj]
- elif isinstance(obj, tuple):
- return tuple([self.fixtype(x) for x in obj])
- elif isinstance(obj, dict):
- return dict([
- (self.fixtype(key), self.fixtype(value))
- for (key, value) in obj.items()
- ])
- else:
- return obj
-
- # check that obj.method(*args) returns result
- def checkequal(self, result, obj, methodname, *args, **kwargs):
- result = self.fixtype(result)
- obj = self.fixtype(obj)
- args = self.fixtype(args)
- kwargs = dict((k, self.fixtype(v)) for k,v in kwargs.items())
- realresult = getattr(obj, methodname)(*args, **kwargs)
- self.assertEqual(
- result,
- realresult
- )
- # if the original is returned make sure that
- # this doesn't happen with subclasses
- if obj is realresult:
- try:
- class subtype(self.__class__.type2test):
- pass
- except TypeError:
- pass # Skip this if we can't subclass
- else:
- obj = subtype(obj)
- realresult = getattr(obj, methodname)(*args)
- self.assertIsNot(obj, realresult)
-
- # check that obj.method(*args) raises exc
- def checkraises(self, exc, obj, methodname, *args):
- obj = self.fixtype(obj)
- args = self.fixtype(args)
- self.assertRaises(
- exc,
- getattr(obj, methodname),
- *args
- )
-
- # call obj.method(*args) without any checks
- def checkcall(self, obj, methodname, *args):
- obj = self.fixtype(obj)
- args = self.fixtype(args)
- getattr(obj, methodname)(*args)
-
- def test_count(self):
- self.checkequal(3, 'aaa', 'count', 'a')
- self.checkequal(0, 'aaa', 'count', 'b')
- self.checkequal(3, 'aaa', 'count', 'a')
- self.checkequal(0, 'aaa', 'count', 'b')
- self.checkequal(3, 'aaa', 'count', 'a')
- self.checkequal(0, 'aaa', 'count', 'b')
- self.checkequal(0, 'aaa', 'count', 'b')
- self.checkequal(2, 'aaa', 'count', 'a', 1)
- self.checkequal(0, 'aaa', 'count', 'a', 10)
- self.checkequal(1, 'aaa', 'count', 'a', -1)
- self.checkequal(3, 'aaa', 'count', 'a', -10)
- self.checkequal(1, 'aaa', 'count', 'a', 0, 1)
- self.checkequal(3, 'aaa', 'count', 'a', 0, 10)
- self.checkequal(2, 'aaa', 'count', 'a', 0, -1)
- self.checkequal(0, 'aaa', 'count', 'a', 0, -10)
- self.checkequal(3, 'aaa', 'count', '', 1)
- self.checkequal(1, 'aaa', 'count', '', 3)
- self.checkequal(0, 'aaa', 'count', '', 10)
- self.checkequal(2, 'aaa', 'count', '', -1)
- self.checkequal(4, 'aaa', 'count', '', -10)
-
- self.checkequal(1, '', 'count', '')
- self.checkequal(0, '', 'count', '', 1, 1)
- self.checkequal(0, '', 'count', '', sys.maxsize, 0)
-
- self.checkequal(0, '', 'count', 'xx')
- self.checkequal(0, '', 'count', 'xx', 1, 1)
- self.checkequal(0, '', 'count', 'xx', sys.maxsize, 0)
-
- self.checkraises(TypeError, 'hello', 'count')
-
- if self.contains_bytes:
- self.checkequal(0, 'hello', 'count', 42)
- else:
- self.checkraises(TypeError, 'hello', 'count', 42)
-
- # For a variety of combinations,
- # verify that str.count() matches an equivalent function
- # replacing all occurrences and then differencing the string lengths
- charset = ['', 'a', 'b']
- digits = 7
- base = len(charset)
- teststrings = set()
- for i in range(base ** digits):
- entry = []
- for j in range(digits):
- i, m = divmod(i, base)
- entry.append(charset[m])
- teststrings.add(''.join(entry))
- teststrings = [self.fixtype(ts) for ts in teststrings]
- for i in teststrings:
- n = len(i)
- for j in teststrings:
- r1 = i.count(j)
- if j:
- r2, rem = divmod(n - len(i.replace(j, self.fixtype(''))),
- len(j))
- else:
- r2, rem = len(i)+1, 0
- if rem or r1 != r2:
- self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i))
- self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i))
-
- def test_find(self):
- self.checkequal(0, 'abcdefghiabc', 'find', 'abc')
- self.checkequal(9, 'abcdefghiabc', 'find', 'abc', 1)
- self.checkequal(-1, 'abcdefghiabc', 'find', 'def', 4)
-
- self.checkequal(0, 'abc', 'find', '', 0)
- self.checkequal(3, 'abc', 'find', '', 3)
- self.checkequal(-1, 'abc', 'find', '', 4)
-
- # to check the ability to pass None as defaults
- self.checkequal( 2, 'rrarrrrrrrrra', 'find', 'a')
- self.checkequal(12, 'rrarrrrrrrrra', 'find', 'a', 4)
- self.checkequal(-1, 'rrarrrrrrrrra', 'find', 'a', 4, 6)
- self.checkequal(12, 'rrarrrrrrrrra', 'find', 'a', 4, None)
- self.checkequal( 2, 'rrarrrrrrrrra', 'find', 'a', None, 6)
-
- self.checkraises(TypeError, 'hello', 'find')
-
- if self.contains_bytes:
- self.checkequal(-1, 'hello', 'find', 42)
- else:
- self.checkraises(TypeError, 'hello', 'find', 42)
-
- self.checkequal(0, '', 'find', '')
- self.checkequal(-1, '', 'find', '', 1, 1)
- self.checkequal(-1, '', 'find', '', sys.maxsize, 0)
-
- self.checkequal(-1, '', 'find', 'xx')
- self.checkequal(-1, '', 'find', 'xx', 1, 1)
- self.checkequal(-1, '', 'find', 'xx', sys.maxsize, 0)
-
- # issue 7458
- self.checkequal(-1, 'ab', 'find', 'xxx', sys.maxsize + 1, 0)
-
- # For a variety of combinations,
- # verify that str.find() matches __contains__
- # and that the found substring is really at that location
- charset = ['', 'a', 'b', 'c']
- digits = 5
- base = len(charset)
- teststrings = set()
- for i in range(base ** digits):
- entry = []
- for j in range(digits):
- i, m = divmod(i, base)
- entry.append(charset[m])
- teststrings.add(''.join(entry))
- teststrings = [self.fixtype(ts) for ts in teststrings]
- for i in teststrings:
- for j in teststrings:
- loc = i.find(j)
- r1 = (loc != -1)
- r2 = j in i
- self.assertEqual(r1, r2)
- if loc != -1:
- self.assertEqual(i[loc:loc+len(j)], j)
-
- def test_rfind(self):
- self.checkequal(9, 'abcdefghiabc', 'rfind', 'abc')
- self.checkequal(12, 'abcdefghiabc', 'rfind', '')
- self.checkequal(0, 'abcdefghiabc', 'rfind', 'abcd')
- self.checkequal(-1, 'abcdefghiabc', 'rfind', 'abcz')
-
- self.checkequal(3, 'abc', 'rfind', '', 0)
- self.checkequal(3, 'abc', 'rfind', '', 3)
- self.checkequal(-1, 'abc', 'rfind', '', 4)
-
- # to check the ability to pass None as defaults
- self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a')
- self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a', 4)
- self.checkequal(-1, 'rrarrrrrrrrra', 'rfind', 'a', 4, 6)
- self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a', 4, None)
- self.checkequal( 2, 'rrarrrrrrrrra', 'rfind', 'a', None, 6)
-
- self.checkraises(TypeError, 'hello', 'rfind')
-
- if self.contains_bytes:
- self.checkequal(-1, 'hello', 'rfind', 42)
- else:
- self.checkraises(TypeError, 'hello', 'rfind', 42)
-
- # For a variety of combinations,
- # verify that str.rfind() matches __contains__
- # and that the found substring is really at that location
- charset = ['', 'a', 'b', 'c']
- digits = 5
- base = len(charset)
- teststrings = set()
- for i in range(base ** digits):
- entry = []
- for j in range(digits):
- i, m = divmod(i, base)
- entry.append(charset[m])
- teststrings.add(''.join(entry))
- teststrings = [self.fixtype(ts) for ts in teststrings]
- for i in teststrings:
- for j in teststrings:
- loc = i.rfind(j)
- r1 = (loc != -1)
- r2 = j in i
- self.assertEqual(r1, r2)
- if loc != -1:
- self.assertEqual(i[loc:loc+len(j)], j)
-
- # issue 7458
- self.checkequal(-1, 'ab', 'rfind', 'xxx', sys.maxsize + 1, 0)
-
- # issue #15534
- self.checkequal(0, '<......\u043c...', "rfind", "<")
-
- def test_index(self):
- self.checkequal(0, 'abcdefghiabc', 'index', '')
- self.checkequal(3, 'abcdefghiabc', 'index', 'def')
- self.checkequal(0, 'abcdefghiabc', 'index', 'abc')
- self.checkequal(9, 'abcdefghiabc', 'index', 'abc', 1)
-
- self.checkraises(ValueError, 'abcdefghiabc', 'index', 'hib')
- self.checkraises(ValueError, 'abcdefghiab', 'index', 'abc', 1)
- self.checkraises(ValueError, 'abcdefghi', 'index', 'ghi', 8)
- self.checkraises(ValueError, 'abcdefghi', 'index', 'ghi', -1)
-
- # to check the ability to pass None as defaults
- self.checkequal( 2, 'rrarrrrrrrrra', 'index', 'a')
- self.checkequal(12, 'rrarrrrrrrrra', 'index', 'a', 4)
- self.checkraises(ValueError, 'rrarrrrrrrrra', 'index', 'a', 4, 6)
- self.checkequal(12, 'rrarrrrrrrrra', 'index', 'a', 4, None)
- self.checkequal( 2, 'rrarrrrrrrrra', 'index', 'a', None, 6)
-
- self.checkraises(TypeError, 'hello', 'index')
-
- if self.contains_bytes:
- self.checkraises(ValueError, 'hello', 'index', 42)
- else:
- self.checkraises(TypeError, 'hello', 'index', 42)
-
- def test_rindex(self):
- self.checkequal(12, 'abcdefghiabc', 'rindex', '')
- self.checkequal(3, 'abcdefghiabc', 'rindex', 'def')
- self.checkequal(9, 'abcdefghiabc', 'rindex', 'abc')
- self.checkequal(0, 'abcdefghiabc', 'rindex', 'abc', 0, -1)
-
- self.checkraises(ValueError, 'abcdefghiabc', 'rindex', 'hib')
- self.checkraises(ValueError, 'defghiabc', 'rindex', 'def', 1)
- self.checkraises(ValueError, 'defghiabc', 'rindex', 'abc', 0, -1)
- self.checkraises(ValueError, 'abcdefghi', 'rindex', 'ghi', 0, 8)
- self.checkraises(ValueError, 'abcdefghi', 'rindex', 'ghi', 0, -1)
-
- # to check the ability to pass None as defaults
- self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a')
- self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a', 4)
- self.checkraises(ValueError, 'rrarrrrrrrrra', 'rindex', 'a', 4, 6)
- self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a', 4, None)
- self.checkequal( 2, 'rrarrrrrrrrra', 'rindex', 'a', None, 6)
-
- self.checkraises(TypeError, 'hello', 'rindex')
-
- if self.contains_bytes:
- self.checkraises(ValueError, 'hello', 'rindex', 42)
- else:
- self.checkraises(TypeError, 'hello', 'rindex', 42)
-
- def test_lower(self):
- self.checkequal('hello', 'HeLLo', 'lower')
- self.checkequal('hello', 'hello', 'lower')
- self.checkraises(TypeError, 'hello', 'lower', 42)
-
- def test_upper(self):
- self.checkequal('HELLO', 'HeLLo', 'upper')
- self.checkequal('HELLO', 'HELLO', 'upper')
- self.checkraises(TypeError, 'hello', 'upper', 42)
-
- def test_expandtabs(self):
- self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs')
- self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 8)
- self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 4)
- self.checkequal('abc\r\nab def\ng hi', 'abc\r\nab\tdef\ng\thi', 'expandtabs', 4)
- self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs')
- self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 8)
- self.checkequal('abc\r\nab\r\ndef\ng\r\nhi', 'abc\r\nab\r\ndef\ng\r\nhi', 'expandtabs', 4)
- self.checkequal(' a\n b', ' \ta\n\tb', 'expandtabs', 1)
-
- self.checkraises(TypeError, 'hello', 'expandtabs', 42, 42)
- # This test is only valid when sizeof(int) == sizeof(void*) == 4.
- if sys.maxsize < (1 << 32) and struct.calcsize('P') == 4:
- self.checkraises(OverflowError,
- '\ta\n\tb', 'expandtabs', sys.maxsize)
-
- def test_split(self):
- # by a char
- self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|')
- self.checkequal(['a|b|c|d'], 'a|b|c|d', 'split', '|', 0)
- self.checkequal(['a', 'b|c|d'], 'a|b|c|d', 'split', '|', 1)
- self.checkequal(['a', 'b', 'c|d'], 'a|b|c|d', 'split', '|', 2)
- self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|', 3)
- self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|', 4)
- self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|',
- sys.maxsize-2)
- self.checkequal(['a|b|c|d'], 'a|b|c|d', 'split', '|', 0)
- self.checkequal(['a', '', 'b||c||d'], 'a||b||c||d', 'split', '|', 2)
- self.checkequal(['endcase ', ''], 'endcase |', 'split', '|')
- self.checkequal(['', ' startcase'], '| startcase', 'split', '|')
- self.checkequal(['', 'bothcase', ''], '|bothcase|', 'split', '|')
- self.checkequal(['a', '', 'b\x00c\x00d'], 'a\x00\x00b\x00c\x00d', 'split', '\x00', 2)
-
- self.checkequal(['a']*20, ('a|'*20)[:-1], 'split', '|')
- self.checkequal(['a']*15 +['a|a|a|a|a'],
- ('a|'*20)[:-1], 'split', '|', 15)
-
- # by string
- self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
- self.checkequal(['a', 'b//c//d'], 'a//b//c//d', 'split', '//', 1)
- self.checkequal(['a', 'b', 'c//d'], 'a//b//c//d', 'split', '//', 2)
- self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//', 3)
- self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//', 4)
- self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//',
- sys.maxsize-10)
- self.checkequal(['a//b//c//d'], 'a//b//c//d', 'split', '//', 0)
- self.checkequal(['a', '', 'b////c////d'], 'a////b////c////d', 'split', '//', 2)
- self.checkequal(['endcase ', ''], 'endcase test', 'split', 'test')
- self.checkequal(['', ' begincase'], 'test begincase', 'split', 'test')
- self.checkequal(['', ' bothcase ', ''], 'test bothcase test',
- 'split', 'test')
- self.checkequal(['a', 'bc'], 'abbbc', 'split', 'bb')
- self.checkequal(['', ''], 'aaa', 'split', 'aaa')
- self.checkequal(['aaa'], 'aaa', 'split', 'aaa', 0)
- self.checkequal(['ab', 'ab'], 'abbaab', 'split', 'ba')
- self.checkequal(['aaaa'], 'aaaa', 'split', 'aab')
- self.checkequal([''], '', 'split', 'aaa')
- self.checkequal(['aa'], 'aa', 'split', 'aaa')
- self.checkequal(['A', 'bobb'], 'Abbobbbobb', 'split', 'bbobb')
- self.checkequal(['A', 'B', ''], 'AbbobbBbbobb', 'split', 'bbobb')
-
- self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'split', 'BLAH')
- self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'split', 'BLAH', 19)
- self.checkequal(['a']*18 + ['aBLAHa'], ('aBLAH'*20)[:-4],
- 'split', 'BLAH', 18)
-
- # with keyword args
- self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', sep='|')
- self.checkequal(['a', 'b|c|d'],
- 'a|b|c|d', 'split', '|', maxsplit=1)
- self.checkequal(['a', 'b|c|d'],
- 'a|b|c|d', 'split', sep='|', maxsplit=1)
- self.checkequal(['a', 'b|c|d'],
- 'a|b|c|d', 'split', maxsplit=1, sep='|')
- self.checkequal(['a', 'b c d'],
- 'a b c d', 'split', maxsplit=1)
-
- # argument type
- self.checkraises(TypeError, 'hello', 'split', 42, 42, 42)
-
- # null case
- self.checkraises(ValueError, 'hello', 'split', '')
- self.checkraises(ValueError, 'hello', 'split', '', 0)
-
- def test_rsplit(self):
- # by a char
- self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|')
- self.checkequal(['a|b|c', 'd'], 'a|b|c|d', 'rsplit', '|', 1)
- self.checkequal(['a|b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 2)
- self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 3)
- self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 4)
- self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|',
- sys.maxsize-100)
- self.checkequal(['a|b|c|d'], 'a|b|c|d', 'rsplit', '|', 0)
- self.checkequal(['a||b||c', '', 'd'], 'a||b||c||d', 'rsplit', '|', 2)
- self.checkequal(['', ' begincase'], '| begincase', 'rsplit', '|')
- self.checkequal(['endcase ', ''], 'endcase |', 'rsplit', '|')
- self.checkequal(['', 'bothcase', ''], '|bothcase|', 'rsplit', '|')
-
- self.checkequal(['a\x00\x00b', 'c', 'd'], 'a\x00\x00b\x00c\x00d', 'rsplit', '\x00', 2)
-
- self.checkequal(['a']*20, ('a|'*20)[:-1], 'rsplit', '|')
- self.checkequal(['a|a|a|a|a']+['a']*15,
- ('a|'*20)[:-1], 'rsplit', '|', 15)
-
- # by string
- self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//')
- self.checkequal(['a//b//c', 'd'], 'a//b//c//d', 'rsplit', '//', 1)
- self.checkequal(['a//b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 2)
- self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 3)
- self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 4)
- self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//',
- sys.maxsize-5)
- self.checkequal(['a//b//c//d'], 'a//b//c//d', 'rsplit', '//', 0)
- self.checkequal(['a////b////c', '', 'd'], 'a////b////c////d', 'rsplit', '//', 2)
- self.checkequal(['', ' begincase'], 'test begincase', 'rsplit', 'test')
- self.checkequal(['endcase ', ''], 'endcase test', 'rsplit', 'test')
- self.checkequal(['', ' bothcase ', ''], 'test bothcase test',
- 'rsplit', 'test')
- self.checkequal(['ab', 'c'], 'abbbc', 'rsplit', 'bb')
- self.checkequal(['', ''], 'aaa', 'rsplit', 'aaa')
- self.checkequal(['aaa'], 'aaa', 'rsplit', 'aaa', 0)
- self.checkequal(['ab', 'ab'], 'abbaab', 'rsplit', 'ba')
- self.checkequal(['aaaa'], 'aaaa', 'rsplit', 'aab')
- self.checkequal([''], '', 'rsplit', 'aaa')
- self.checkequal(['aa'], 'aa', 'rsplit', 'aaa')
- self.checkequal(['bbob', 'A'], 'bbobbbobbA', 'rsplit', 'bbobb')
- self.checkequal(['', 'B', 'A'], 'bbobbBbbobbA', 'rsplit', 'bbobb')
-
- self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'rsplit', 'BLAH')
- self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'rsplit', 'BLAH', 19)
- self.checkequal(['aBLAHa'] + ['a']*18, ('aBLAH'*20)[:-4],
- 'rsplit', 'BLAH', 18)
-
- # with keyword args
- self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', sep='|')
- self.checkequal(['a|b|c', 'd'],
- 'a|b|c|d', 'rsplit', '|', maxsplit=1)
- self.checkequal(['a|b|c', 'd'],
- 'a|b|c|d', 'rsplit', sep='|', maxsplit=1)
- self.checkequal(['a|b|c', 'd'],
- 'a|b|c|d', 'rsplit', maxsplit=1, sep='|')
- self.checkequal(['a b c', 'd'],
- 'a b c d', 'rsplit', maxsplit=1)
-
- # argument type
- self.checkraises(TypeError, 'hello', 'rsplit', 42, 42, 42)
-
- # null case
- self.checkraises(ValueError, 'hello', 'rsplit', '')
- self.checkraises(ValueError, 'hello', 'rsplit', '', 0)
-
- def test_replace(self):
- EQ = self.checkequal
-
- # Operations on the empty string
- EQ("", "", "replace", "", "")
- EQ("A", "", "replace", "", "A")
- EQ("", "", "replace", "A", "")
- EQ("", "", "replace", "A", "A")
- EQ("", "", "replace", "", "", 100)
- EQ("", "", "replace", "", "", sys.maxsize)
-
- # interleave (from=="", 'to' gets inserted everywhere)
- EQ("A", "A", "replace", "", "")
- EQ("*A*", "A", "replace", "", "*")
- EQ("*1A*1", "A", "replace", "", "*1")
- EQ("*-#A*-#", "A", "replace", "", "*-#")
- EQ("*-A*-A*-", "AA", "replace", "", "*-")
- EQ("*-A*-A*-", "AA", "replace", "", "*-", -1)
- EQ("*-A*-A*-", "AA", "replace", "", "*-", sys.maxsize)
- EQ("*-A*-A*-", "AA", "replace", "", "*-", 4)
- EQ("*-A*-A*-", "AA", "replace", "", "*-", 3)
- EQ("*-A*-A", "AA", "replace", "", "*-", 2)
- EQ("*-AA", "AA", "replace", "", "*-", 1)
- EQ("AA", "AA", "replace", "", "*-", 0)
-
- # single character deletion (from=="A", to=="")
- EQ("", "A", "replace", "A", "")
- EQ("", "AAA", "replace", "A", "")
- EQ("", "AAA", "replace", "A", "", -1)
- EQ("", "AAA", "replace", "A", "", sys.maxsize)
- EQ("", "AAA", "replace", "A", "", 4)
- EQ("", "AAA", "replace", "A", "", 3)
- EQ("A", "AAA", "replace", "A", "", 2)
- EQ("AA", "AAA", "replace", "A", "", 1)
- EQ("AAA", "AAA", "replace", "A", "", 0)
- EQ("", "AAAAAAAAAA", "replace", "A", "")
- EQ("BCD", "ABACADA", "replace", "A", "")
- EQ("BCD", "ABACADA", "replace", "A", "", -1)
- EQ("BCD", "ABACADA", "replace", "A", "", sys.maxsize)
- EQ("BCD", "ABACADA", "replace", "A", "", 5)
- EQ("BCD", "ABACADA", "replace", "A", "", 4)
- EQ("BCDA", "ABACADA", "replace", "A", "", 3)
- EQ("BCADA", "ABACADA", "replace", "A", "", 2)
- EQ("BACADA", "ABACADA", "replace", "A", "", 1)
- EQ("ABACADA", "ABACADA", "replace", "A", "", 0)
- EQ("BCD", "ABCAD", "replace", "A", "")
- EQ("BCD", "ABCADAA", "replace", "A", "")
- EQ("BCD", "BCD", "replace", "A", "")
- EQ("*************", "*************", "replace", "A", "")
- EQ("^A^", "^"+"A"*1000+"^", "replace", "A", "", 999)
-
- # substring deletion (from=="the", to=="")
- EQ("", "the", "replace", "the", "")
- EQ("ater", "theater", "replace", "the", "")
- EQ("", "thethe", "replace", "the", "")
- EQ("", "thethethethe", "replace", "the", "")
- EQ("aaaa", "theatheatheathea", "replace", "the", "")
- EQ("that", "that", "replace", "the", "")
- EQ("thaet", "thaet", "replace", "the", "")
- EQ("here and re", "here and there", "replace", "the", "")
- EQ("here and re and re", "here and there and there",
- "replace", "the", "", sys.maxsize)
- EQ("here and re and re", "here and there and there",
- "replace", "the", "", -1)
- EQ("here and re and re", "here and there and there",
- "replace", "the", "", 3)
- EQ("here and re and re", "here and there and there",
- "replace", "the", "", 2)
- EQ("here and re and there", "here and there and there",
- "replace", "the", "", 1)
- EQ("here and there and there", "here and there and there",
- "replace", "the", "", 0)
- EQ("here and re and re", "here and there and there", "replace", "the", "")
-
- EQ("abc", "abc", "replace", "the", "")
- EQ("abcdefg", "abcdefg", "replace", "the", "")
-
- # substring deletion (from=="bob", to=="")
- EQ("bob", "bbobob", "replace", "bob", "")
- EQ("bobXbob", "bbobobXbbobob", "replace", "bob", "")
- EQ("aaaaaaa", "aaaaaaabob", "replace", "bob", "")
- EQ("aaaaaaa", "aaaaaaa", "replace", "bob", "")
-
- # single character replace in place (len(from)==len(to)==1)
- EQ("Who goes there?", "Who goes there?", "replace", "o", "o")
- EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O")
- EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", sys.maxsize)
- EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", -1)
- EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", 3)
- EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", 2)
- EQ("WhO goes there?", "Who goes there?", "replace", "o", "O", 1)
- EQ("Who goes there?", "Who goes there?", "replace", "o", "O", 0)
-
- EQ("Who goes there?", "Who goes there?", "replace", "a", "q")
- EQ("who goes there?", "Who goes there?", "replace", "W", "w")
- EQ("wwho goes there?ww", "WWho goes there?WW", "replace", "W", "w")
- EQ("Who goes there!", "Who goes there?", "replace", "?", "!")
- EQ("Who goes there!!", "Who goes there??", "replace", "?", "!")
-
- EQ("Who goes there?", "Who goes there?", "replace", ".", "!")
-
- # substring replace in place (len(from)==len(to) > 1)
- EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**")
- EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", sys.maxsize)
- EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", -1)
- EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", 4)
- EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", 3)
- EQ("Th** ** a tissue", "This is a tissue", "replace", "is", "**", 2)
- EQ("Th** is a tissue", "This is a tissue", "replace", "is", "**", 1)
- EQ("This is a tissue", "This is a tissue", "replace", "is", "**", 0)
- EQ("cobob", "bobob", "replace", "bob", "cob")
- EQ("cobobXcobocob", "bobobXbobobob", "replace", "bob", "cob")
- EQ("bobob", "bobob", "replace", "bot", "bot")
-
- # replace single character (len(from)==1, len(to)>1)
- EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK")
- EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK", -1)
- EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK", sys.maxsize)
- EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK", 2)
- EQ("ReyKKjavik", "Reykjavik", "replace", "k", "KK", 1)
- EQ("Reykjavik", "Reykjavik", "replace", "k", "KK", 0)
- EQ("A----B----C----", "A.B.C.", "replace", ".", "----")
- # issue #15534
- EQ('...\u043c......<', '...\u043c......<', "replace", "<", "<")
-
- EQ("Reykjavik", "Reykjavik", "replace", "q", "KK")
-
- # replace substring (len(from)>1, len(to)!=len(from))
- EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
- "replace", "spam", "ham")
- EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
- "replace", "spam", "ham", sys.maxsize)
- EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
- "replace", "spam", "ham", -1)
- EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
- "replace", "spam", "ham", 4)
- EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
- "replace", "spam", "ham", 3)
- EQ("ham, ham, eggs and spam", "spam, spam, eggs and spam",
- "replace", "spam", "ham", 2)
- EQ("ham, spam, eggs and spam", "spam, spam, eggs and spam",
- "replace", "spam", "ham", 1)
- EQ("spam, spam, eggs and spam", "spam, spam, eggs and spam",
- "replace", "spam", "ham", 0)
-
- EQ("bobob", "bobobob", "replace", "bobob", "bob")
- EQ("bobobXbobob", "bobobobXbobobob", "replace", "bobob", "bob")
- EQ("BOBOBOB", "BOBOBOB", "replace", "bob", "bobby")
-
- # XXX Commented out. Is there any reason to support buffer objects
- # as arguments for str.replace()? GvR
-## ba = bytearray('a')
-## bb = bytearray('b')
-## EQ("bbc", "abc", "replace", ba, bb)
-## EQ("aac", "abc", "replace", bb, ba)
-
- #
- self.checkequal('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1)
- self.checkequal('onetwothree', 'one!two!three!', 'replace', '!', '')
- self.checkequal('one@two@three!', 'one!two!three!', 'replace', '!', '@', 2)
- self.checkequal('one@two@three@', 'one!two!three!', 'replace', '!', '@', 3)
- self.checkequal('one@two@three@', 'one!two!three!', 'replace', '!', '@', 4)
- self.checkequal('one!two!three!', 'one!two!three!', 'replace', '!', '@', 0)
- self.checkequal('one@two@three@', 'one!two!three!', 'replace', '!', '@')
- self.checkequal('one!two!three!', 'one!two!three!', 'replace', 'x', '@')
- self.checkequal('one!two!three!', 'one!two!three!', 'replace', 'x', '@', 2)
- self.checkequal('-a-b-c-', 'abc', 'replace', '', '-')
- self.checkequal('-a-b-c', 'abc', 'replace', '', '-', 3)
- self.checkequal('abc', 'abc', 'replace', '', '-', 0)
- self.checkequal('', '', 'replace', '', '')
- self.checkequal('abc', 'abc', 'replace', 'ab', '--', 0)
- self.checkequal('abc', 'abc', 'replace', 'xy', '--')
- # Next three for SF bug 422088: [OSF1 alpha] string.replace(); died with
- # MemoryError due to empty result (platform malloc issue when requesting
- # 0 bytes).
- self.checkequal('', '123', 'replace', '123', '')
- self.checkequal('', '123123', 'replace', '123', '')
- self.checkequal('x', '123x123', 'replace', '123', '')
-
- self.checkraises(TypeError, 'hello', 'replace')
- self.checkraises(TypeError, 'hello', 'replace', 42)
- self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
- self.checkraises(TypeError, 'hello', 'replace', 'h', 42)
-
- def test_replace_overflow(self):
- # Check for overflow checking on 32 bit machines
- if sys.maxsize != 2147483647 or struct.calcsize("P") > 4:
- return
- A2_16 = "A" * (2**16)
- self.checkraises(OverflowError, A2_16, "replace", "", A2_16)
- self.checkraises(OverflowError, A2_16, "replace", "A", A2_16)
- self.checkraises(OverflowError, A2_16, "replace", "AA", A2_16+A2_16)
-
-
-
-class CommonTest(BaseTest):
- # This testcase contains test that can be used in all
- # stringlike classes. Currently this is str, unicode
- # UserString and the string module.
-
- def test_hash(self):
- # SF bug 1054139: += optimization was not invalidating cached hash value
- a = self.type2test('DNSSEC')
- b = self.type2test('')
- for c in a:
- b += c
- hash(b)
- self.assertEqual(hash(a), hash(b))
-
- def test_capitalize(self):
- self.checkequal(' hello ', ' hello ', 'capitalize')
- self.checkequal('Hello ', 'Hello ','capitalize')
- self.checkequal('Hello ', 'hello ','capitalize')
- self.checkequal('Aaaa', 'aaaa', 'capitalize')
- self.checkequal('Aaaa', 'AaAa', 'capitalize')
-
- # check that titlecased chars are lowered correctly
- # \u1ffc is the titlecased char
- self.checkequal('\u03a9\u0399\u1ff3\u1ff3\u1ff3',
- '\u1ff3\u1ff3\u1ffc\u1ffc', 'capitalize')
- # check with cased non-letter chars
- self.checkequal('\u24c5\u24e8\u24e3\u24d7\u24de\u24dd',
- '\u24c5\u24ce\u24c9\u24bd\u24c4\u24c3', 'capitalize')
- self.checkequal('\u24c5\u24e8\u24e3\u24d7\u24de\u24dd',
- '\u24df\u24e8\u24e3\u24d7\u24de\u24dd', 'capitalize')
- self.checkequal('\u2160\u2171\u2172',
- '\u2160\u2161\u2162', 'capitalize')
- self.checkequal('\u2160\u2171\u2172',
- '\u2170\u2171\u2172', 'capitalize')
- # check with Ll chars with no upper - nothing changes here
- self.checkequal('\u019b\u1d00\u1d86\u0221\u1fb7',
- '\u019b\u1d00\u1d86\u0221\u1fb7', 'capitalize')
-
- self.checkraises(TypeError, 'hello', 'capitalize', 42)
-
- def test_lower(self):
- self.checkequal('hello', 'HeLLo', 'lower')
- self.checkequal('hello', 'hello', 'lower')
- self.checkraises(TypeError, 'hello', 'lower', 42)
-
- def test_upper(self):
- self.checkequal('HELLO', 'HeLLo', 'upper')
- self.checkequal('HELLO', 'HELLO', 'upper')
- self.checkraises(TypeError, 'hello', 'upper', 42)
-
- def test_expandtabs(self):
- self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs')
- self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 8)
- self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 4)
- self.checkequal('abc\r\nab def\ng hi', 'abc\r\nab\tdef\ng\thi', 'expandtabs', 4)
- self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs')
- self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 8)
- self.checkequal('abc\r\nab\r\ndef\ng\r\nhi', 'abc\r\nab\r\ndef\ng\r\nhi', 'expandtabs', 4)
-
- self.checkraises(TypeError, 'hello', 'expandtabs', 42, 42)
-
- def test_additional_split(self):
- self.checkequal(['this', 'is', 'the', 'split', 'function'],
- 'this is the split function', 'split')
-
- # by whitespace
- self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'split')
- self.checkequal(['a', 'b c d'], 'a b c d', 'split', None, 1)
- self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2)
- self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 3)
- self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 4)
- self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None,
- sys.maxsize-1)
- self.checkequal(['a b c d'], 'a b c d', 'split', None, 0)
- self.checkequal(['a b c d'], ' a b c d', 'split', None, 0)
- self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2)
-
- self.checkequal([], ' ', 'split')
- self.checkequal(['a'], ' a ', 'split')
- self.checkequal(['a', 'b'], ' a b ', 'split')
- self.checkequal(['a', 'b '], ' a b ', 'split', None, 1)
- self.checkequal(['a', 'b c '], ' a b c ', 'split', None, 1)
- self.checkequal(['a', 'b', 'c '], ' a b c ', 'split', None, 2)
- self.checkequal(['a', 'b'], '\n\ta \t\r b \v ', 'split')
- aaa = ' a '*20
- self.checkequal(['a']*20, aaa, 'split')
- self.checkequal(['a'] + [aaa[4:]], aaa, 'split', None, 1)
- self.checkequal(['a']*19 + ['a '], aaa, 'split', None, 19)
-
- # mixed use of str and unicode
- self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', ' ', 2)
-
- def test_additional_rsplit(self):
- self.checkequal(['this', 'is', 'the', 'rsplit', 'function'],
- 'this is the rsplit function', 'rsplit')
-
- # by whitespace
- self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'rsplit')
- self.checkequal(['a b c', 'd'], 'a b c d', 'rsplit', None, 1)
- self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2)
- self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 3)
- self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 4)
- self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None,
- sys.maxsize-20)
- self.checkequal(['a b c d'], 'a b c d', 'rsplit', None, 0)
- self.checkequal(['a b c d'], 'a b c d ', 'rsplit', None, 0)
- self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2)
-
- self.checkequal([], ' ', 'rsplit')
- self.checkequal(['a'], ' a ', 'rsplit')
- self.checkequal(['a', 'b'], ' a b ', 'rsplit')
- self.checkequal([' a', 'b'], ' a b ', 'rsplit', None, 1)
- self.checkequal([' a b','c'], ' a b c ', 'rsplit',
- None, 1)
- self.checkequal([' a', 'b', 'c'], ' a b c ', 'rsplit',
- None, 2)
- self.checkequal(['a', 'b'], '\n\ta \t\r b \v ', 'rsplit', None, 88)
- aaa = ' a '*20
- self.checkequal(['a']*20, aaa, 'rsplit')
- self.checkequal([aaa[:-4]] + ['a'], aaa, 'rsplit', None, 1)
- self.checkequal([' a a'] + ['a']*18, aaa, 'rsplit', None, 18)
-
- # mixed use of str and unicode
- self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', ' ', 2)
-
- def test_strip(self):
- self.checkequal('hello', ' hello ', 'strip')
- self.checkequal('hello ', ' hello ', 'lstrip')
- self.checkequal(' hello', ' hello ', 'rstrip')
- self.checkequal('hello', 'hello', 'strip')
-
- # strip/lstrip/rstrip with None arg
- self.checkequal('hello', ' hello ', 'strip', None)
- self.checkequal('hello ', ' hello ', 'lstrip', None)
- self.checkequal(' hello', ' hello ', 'rstrip', None)
- self.checkequal('hello', 'hello', 'strip', None)
-
- # strip/lstrip/rstrip with str arg
- self.checkequal('hello', 'xyzzyhelloxyzzy', 'strip', 'xyz')
- self.checkequal('helloxyzzy', 'xyzzyhelloxyzzy', 'lstrip', 'xyz')
- self.checkequal('xyzzyhello', 'xyzzyhelloxyzzy', 'rstrip', 'xyz')
- self.checkequal('hello', 'hello', 'strip', 'xyz')
-
- self.checkraises(TypeError, 'hello', 'strip', 42, 42)
- self.checkraises(TypeError, 'hello', 'lstrip', 42, 42)
- self.checkraises(TypeError, 'hello', 'rstrip', 42, 42)
-
- def test_ljust(self):
- self.checkequal('abc ', 'abc', 'ljust', 10)
- self.checkequal('abc ', 'abc', 'ljust', 6)
- self.checkequal('abc', 'abc', 'ljust', 3)
- self.checkequal('abc', 'abc', 'ljust', 2)
- self.checkequal('abc*******', 'abc', 'ljust', 10, '*')
- self.checkraises(TypeError, 'abc', 'ljust')
-
- def test_rjust(self):
- self.checkequal(' abc', 'abc', 'rjust', 10)
- self.checkequal(' abc', 'abc', 'rjust', 6)
- self.checkequal('abc', 'abc', 'rjust', 3)
- self.checkequal('abc', 'abc', 'rjust', 2)
- self.checkequal('*******abc', 'abc', 'rjust', 10, '*')
- self.checkraises(TypeError, 'abc', 'rjust')
-
- def test_center(self):
- self.checkequal(' abc ', 'abc', 'center', 10)
- self.checkequal(' abc ', 'abc', 'center', 6)
- self.checkequal('abc', 'abc', 'center', 3)
- self.checkequal('abc', 'abc', 'center', 2)
- self.checkequal('***abc****', 'abc', 'center', 10, '*')
- self.checkraises(TypeError, 'abc', 'center')
-
- def test_swapcase(self):
- self.checkequal('hEllO CoMPuTErS', 'HeLLo cOmpUteRs', 'swapcase')
-
- self.checkraises(TypeError, 'hello', 'swapcase', 42)
-
- def test_zfill(self):
- self.checkequal('123', '123', 'zfill', 2)
- self.checkequal('123', '123', 'zfill', 3)
- self.checkequal('0123', '123', 'zfill', 4)
- self.checkequal('+123', '+123', 'zfill', 3)
- self.checkequal('+123', '+123', 'zfill', 4)
- self.checkequal('+0123', '+123', 'zfill', 5)
- self.checkequal('-123', '-123', 'zfill', 3)
- self.checkequal('-123', '-123', 'zfill', 4)
- self.checkequal('-0123', '-123', 'zfill', 5)
- self.checkequal('000', '', 'zfill', 3)
- self.checkequal('34', '34', 'zfill', 1)
- self.checkequal('0034', '34', 'zfill', 4)
-
- self.checkraises(TypeError, '123', 'zfill')
-
-class MixinStrUnicodeUserStringTest(object):
- # additional tests that only work for
- # stringlike objects, i.e. str, unicode, UserString
- # (but not the string module)
-
- def test_islower(self):
- self.checkequal(False, '', 'islower')
- self.checkequal(True, 'a', 'islower')
- self.checkequal(False, 'A', 'islower')
- self.checkequal(False, '\n', 'islower')
- self.checkequal(True, 'abc', 'islower')
- self.checkequal(False, 'aBc', 'islower')
- self.checkequal(True, 'abc\n', 'islower')
- self.checkraises(TypeError, 'abc', 'islower', 42)
-
- def test_isupper(self):
- self.checkequal(False, '', 'isupper')
- self.checkequal(False, 'a', 'isupper')
- self.checkequal(True, 'A', 'isupper')
- self.checkequal(False, '\n', 'isupper')
- self.checkequal(True, 'ABC', 'isupper')
- self.checkequal(False, 'AbC', 'isupper')
- self.checkequal(True, 'ABC\n', 'isupper')
- self.checkraises(TypeError, 'abc', 'isupper', 42)
-
- def test_istitle(self):
- self.checkequal(False, '', 'istitle')
- self.checkequal(False, 'a', 'istitle')
- self.checkequal(True, 'A', 'istitle')
- self.checkequal(False, '\n', 'istitle')
- self.checkequal(True, 'A Titlecased Line', 'istitle')
- self.checkequal(True, 'A\nTitlecased Line', 'istitle')
- self.checkequal(True, 'A Titlecased, Line', 'istitle')
- self.checkequal(False, 'Not a capitalized String', 'istitle')
- self.checkequal(False, 'Not\ta Titlecase String', 'istitle')
- self.checkequal(False, 'Not--a Titlecase String', 'istitle')
- self.checkequal(False, 'NOT', 'istitle')
- self.checkraises(TypeError, 'abc', 'istitle', 42)
-
- def test_isspace(self):
- self.checkequal(False, '', 'isspace')
- self.checkequal(False, 'a', 'isspace')
- self.checkequal(True, ' ', 'isspace')
- self.checkequal(True, '\t', 'isspace')
- self.checkequal(True, '\r', 'isspace')
- self.checkequal(True, '\n', 'isspace')
- self.checkequal(True, ' \t\r\n', 'isspace')
- self.checkequal(False, ' \t\r\na', 'isspace')
- self.checkraises(TypeError, 'abc', 'isspace', 42)
-
- def test_isalpha(self):
- self.checkequal(False, '', 'isalpha')
- self.checkequal(True, 'a', 'isalpha')
- self.checkequal(True, 'A', 'isalpha')
- self.checkequal(False, '\n', 'isalpha')
- self.checkequal(True, 'abc', 'isalpha')
- self.checkequal(False, 'aBc123', 'isalpha')
- self.checkequal(False, 'abc\n', 'isalpha')
- self.checkraises(TypeError, 'abc', 'isalpha', 42)
-
- def test_isalnum(self):
- self.checkequal(False, '', 'isalnum')
- self.checkequal(True, 'a', 'isalnum')
- self.checkequal(True, 'A', 'isalnum')
- self.checkequal(False, '\n', 'isalnum')
- self.checkequal(True, '123abc456', 'isalnum')
- self.checkequal(True, 'a1b3c', 'isalnum')
- self.checkequal(False, 'aBc000 ', 'isalnum')
- self.checkequal(False, 'abc\n', 'isalnum')
- self.checkraises(TypeError, 'abc', 'isalnum', 42)
-
- def test_isdigit(self):
- self.checkequal(False, '', 'isdigit')
- self.checkequal(False, 'a', 'isdigit')
- self.checkequal(True, '0', 'isdigit')
- self.checkequal(True, '0123456789', 'isdigit')
- self.checkequal(False, '0123456789a', 'isdigit')
-
- self.checkraises(TypeError, 'abc', 'isdigit', 42)
-
- def test_title(self):
- self.checkequal(' Hello ', ' hello ', 'title')
- self.checkequal('Hello ', 'hello ', 'title')
- self.checkequal('Hello ', 'Hello ', 'title')
- self.checkequal('Format This As Title String', "fOrMaT thIs aS titLe String", 'title')
- self.checkequal('Format,This-As*Title;String', "fOrMaT,thIs-aS*titLe;String", 'title', )
- self.checkequal('Getint', "getInt", 'title')
- self.checkraises(TypeError, 'hello', 'title', 42)
-
- def test_splitlines(self):
- self.checkequal(['abc', 'def', '', 'ghi'], "abc\ndef\n\rghi", 'splitlines')
- self.checkequal(['abc', 'def', '', 'ghi'], "abc\ndef\n\r\nghi", 'splitlines')
- self.checkequal(['abc', 'def', 'ghi'], "abc\ndef\r\nghi", 'splitlines')
- self.checkequal(['abc', 'def', 'ghi'], "abc\ndef\r\nghi\n", 'splitlines')
- self.checkequal(['abc', 'def', 'ghi', ''], "abc\ndef\r\nghi\n\r", 'splitlines')
- self.checkequal(['', 'abc', 'def', 'ghi', ''], "\nabc\ndef\r\nghi\n\r", 'splitlines')
- self.checkequal(['', 'abc', 'def', 'ghi', ''],
- "\nabc\ndef\r\nghi\n\r", 'splitlines', False)
- self.checkequal(['\n', 'abc\n', 'def\r\n', 'ghi\n', '\r'],
- "\nabc\ndef\r\nghi\n\r", 'splitlines', True)
- self.checkequal(['', 'abc', 'def', 'ghi', ''], "\nabc\ndef\r\nghi\n\r",
- 'splitlines', keepends=False)
- self.checkequal(['\n', 'abc\n', 'def\r\n', 'ghi\n', '\r'],
- "\nabc\ndef\r\nghi\n\r", 'splitlines', keepends=True)
-
- self.checkraises(TypeError, 'abc', 'splitlines', 42, 42)
-
- def test_startswith(self):
- self.checkequal(True, 'hello', 'startswith', 'he')
- self.checkequal(True, 'hello', 'startswith', 'hello')
- self.checkequal(False, 'hello', 'startswith', 'hello world')
- self.checkequal(True, 'hello', 'startswith', '')
- self.checkequal(False, 'hello', 'startswith', 'ello')
- self.checkequal(True, 'hello', 'startswith', 'ello', 1)
- self.checkequal(True, 'hello', 'startswith', 'o', 4)
- self.checkequal(False, 'hello', 'startswith', 'o', 5)
- self.checkequal(True, 'hello', 'startswith', '', 5)
- self.checkequal(False, 'hello', 'startswith', 'lo', 6)
- self.checkequal(True, 'helloworld', 'startswith', 'lowo', 3)
- self.checkequal(True, 'helloworld', 'startswith', 'lowo', 3, 7)
- self.checkequal(False, 'helloworld', 'startswith', 'lowo', 3, 6)
-
- # test negative indices
- self.checkequal(True, 'hello', 'startswith', 'he', 0, -1)
- self.checkequal(True, 'hello', 'startswith', 'he', -53, -1)
- self.checkequal(False, 'hello', 'startswith', 'hello', 0, -1)
- self.checkequal(False, 'hello', 'startswith', 'hello world', -1, -10)
- self.checkequal(False, 'hello', 'startswith', 'ello', -5)
- self.checkequal(True, 'hello', 'startswith', 'ello', -4)
- self.checkequal(False, 'hello', 'startswith', 'o', -2)
- self.checkequal(True, 'hello', 'startswith', 'o', -1)
- self.checkequal(True, 'hello', 'startswith', '', -3, -3)
- self.checkequal(False, 'hello', 'startswith', 'lo', -9)
-
- self.checkraises(TypeError, 'hello', 'startswith')
- self.checkraises(TypeError, 'hello', 'startswith', 42)
-
- # test tuple arguments
- self.checkequal(True, 'hello', 'startswith', ('he', 'ha'))
- self.checkequal(False, 'hello', 'startswith', ('lo', 'llo'))
- self.checkequal(True, 'hello', 'startswith', ('hellox', 'hello'))
- self.checkequal(False, 'hello', 'startswith', ())
- self.checkequal(True, 'helloworld', 'startswith', ('hellowo',
- 'rld', 'lowo'), 3)
- self.checkequal(False, 'helloworld', 'startswith', ('hellowo', 'ello',
- 'rld'), 3)
- self.checkequal(True, 'hello', 'startswith', ('lo', 'he'), 0, -1)
- self.checkequal(False, 'hello', 'startswith', ('he', 'hel'), 0, 1)
- self.checkequal(True, 'hello', 'startswith', ('he', 'hel'), 0, 2)
-
- self.checkraises(TypeError, 'hello', 'startswith', (42,))
-
- def test_endswith(self):
- self.checkequal(True, 'hello', 'endswith', 'lo')
- self.checkequal(False, 'hello', 'endswith', 'he')
- self.checkequal(True, 'hello', 'endswith', '')
- self.checkequal(False, 'hello', 'endswith', 'hello world')
- self.checkequal(False, 'helloworld', 'endswith', 'worl')
- self.checkequal(True, 'helloworld', 'endswith', 'worl', 3, 9)
- self.checkequal(True, 'helloworld', 'endswith', 'world', 3, 12)
- self.checkequal(True, 'helloworld', 'endswith', 'lowo', 1, 7)
- self.checkequal(True, 'helloworld', 'endswith', 'lowo', 2, 7)
- self.checkequal(True, 'helloworld', 'endswith', 'lowo', 3, 7)
- self.checkequal(False, 'helloworld', 'endswith', 'lowo', 4, 7)
- self.checkequal(False, 'helloworld', 'endswith', 'lowo', 3, 8)
- self.checkequal(False, 'ab', 'endswith', 'ab', 0, 1)
- self.checkequal(False, 'ab', 'endswith', 'ab', 0, 0)
-
- # test negative indices
- self.checkequal(True, 'hello', 'endswith', 'lo', -2)
- self.checkequal(False, 'hello', 'endswith', 'he', -2)
- self.checkequal(True, 'hello', 'endswith', '', -3, -3)
- self.checkequal(False, 'hello', 'endswith', 'hello world', -10, -2)
- self.checkequal(False, 'helloworld', 'endswith', 'worl', -6)
- self.checkequal(True, 'helloworld', 'endswith', 'worl', -5, -1)
- self.checkequal(True, 'helloworld', 'endswith', 'worl', -5, 9)
- self.checkequal(True, 'helloworld', 'endswith', 'world', -7, 12)
- self.checkequal(True, 'helloworld', 'endswith', 'lowo', -99, -3)
- self.checkequal(True, 'helloworld', 'endswith', 'lowo', -8, -3)
- self.checkequal(True, 'helloworld', 'endswith', 'lowo', -7, -3)
- self.checkequal(False, 'helloworld', 'endswith', 'lowo', 3, -4)
- self.checkequal(False, 'helloworld', 'endswith', 'lowo', -8, -2)
-
- self.checkraises(TypeError, 'hello', 'endswith')
- self.checkraises(TypeError, 'hello', 'endswith', 42)
-
- # test tuple arguments
- self.checkequal(False, 'hello', 'endswith', ('he', 'ha'))
- self.checkequal(True, 'hello', 'endswith', ('lo', 'llo'))
- self.checkequal(True, 'hello', 'endswith', ('hellox', 'hello'))
- self.checkequal(False, 'hello', 'endswith', ())
- self.checkequal(True, 'helloworld', 'endswith', ('hellowo',
- 'rld', 'lowo'), 3)
- self.checkequal(False, 'helloworld', 'endswith', ('hellowo', 'ello',
- 'rld'), 3, -1)
- self.checkequal(True, 'hello', 'endswith', ('hell', 'ell'), 0, -1)
- self.checkequal(False, 'hello', 'endswith', ('he', 'hel'), 0, 1)
- self.checkequal(True, 'hello', 'endswith', ('he', 'hell'), 0, 4)
-
- self.checkraises(TypeError, 'hello', 'endswith', (42,))
-
- def test___contains__(self):
- self.checkequal(True, '', '__contains__', '')
- self.checkequal(True, 'abc', '__contains__', '')
- self.checkequal(False, 'abc', '__contains__', '\0')
- self.checkequal(True, '\0abc', '__contains__', '\0')
- self.checkequal(True, 'abc\0', '__contains__', '\0')
- self.checkequal(True, '\0abc', '__contains__', 'a')
- self.checkequal(True, 'asdf', '__contains__', 'asdf')
- self.checkequal(False, 'asd', '__contains__', 'asdf')
- self.checkequal(False, '', '__contains__', 'asdf')
-
- def test_subscript(self):
- self.checkequal('a', 'abc', '__getitem__', 0)
- self.checkequal('c', 'abc', '__getitem__', -1)
- self.checkequal('a', 'abc', '__getitem__', 0)
- self.checkequal('abc', 'abc', '__getitem__', slice(0, 3))
- self.checkequal('abc', 'abc', '__getitem__', slice(0, 1000))
- self.checkequal('a', 'abc', '__getitem__', slice(0, 1))
- self.checkequal('', 'abc', '__getitem__', slice(0, 0))
-
- self.checkraises(TypeError, 'abc', '__getitem__', 'def')
-
- def test_slice(self):
- self.checkequal('abc', 'abc', '__getitem__', slice(0, 1000))
- self.checkequal('abc', 'abc', '__getitem__', slice(0, 3))
- self.checkequal('ab', 'abc', '__getitem__', slice(0, 2))
- self.checkequal('bc', 'abc', '__getitem__', slice(1, 3))
- self.checkequal('b', 'abc', '__getitem__', slice(1, 2))
- self.checkequal('', 'abc', '__getitem__', slice(2, 2))
- self.checkequal('', 'abc', '__getitem__', slice(1000, 1000))
- self.checkequal('', 'abc', '__getitem__', slice(2000, 1000))
- self.checkequal('', 'abc', '__getitem__', slice(2, 1))
-
- self.checkraises(TypeError, 'abc', '__getitem__', 'def')
-
- def test_extended_getslice(self):
- # Test extended slicing by comparing with list slicing.
- s = string.ascii_letters + string.digits
- indices = (0, None, 1, 3, 41, -1, -2, -37)
- for start in indices:
- for stop in indices:
- # Skip step 0 (invalid)
- for step in indices[1:]:
- L = list(s)[start:stop:step]
- self.checkequal("".join(L), s, '__getitem__',
- slice(start, stop, step))
-
- def test_mul(self):
- self.checkequal('', 'abc', '__mul__', -1)
- self.checkequal('', 'abc', '__mul__', 0)
- self.checkequal('abc', 'abc', '__mul__', 1)
- self.checkequal('abcabcabc', 'abc', '__mul__', 3)
- self.checkraises(TypeError, 'abc', '__mul__')
- self.checkraises(TypeError, 'abc', '__mul__', '')
- # XXX: on a 64-bit system, this doesn't raise an overflow error,
- # but either raises a MemoryError, or succeeds (if you have 54TiB)
- #self.checkraises(OverflowError, 10000*'abc', '__mul__', 2000000000)
-
- def test_join(self):
- # join now works with any sequence type
- # moved here, because the argument order is
- # different in string.join (see the test in
- # test.test_string.StringTest.test_join)
- self.checkequal('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
- self.checkequal('abcd', '', 'join', ('a', 'b', 'c', 'd'))
- self.checkequal('bd', '', 'join', ('', 'b', '', 'd'))
- self.checkequal('ac', '', 'join', ('a', '', 'c', ''))
- self.checkequal('w x y z', ' ', 'join', Sequence())
- self.checkequal('abc', 'a', 'join', ('abc',))
- self.checkequal('z', 'a', 'join', UserList(['z']))
- self.checkequal('a.b.c', '.', 'join', ['a', 'b', 'c'])
- self.assertRaises(TypeError, '.'.join, ['a', 'b', 3])
- for i in [5, 25, 125]:
- self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join',
- ['a' * i] * i)
- self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join',
- ('a' * i,) * i)
-
- #self.checkequal(str(BadSeq1()), ' ', 'join', BadSeq1())
- self.checkequal('a b c', ' ', 'join', BadSeq2())
-
- self.checkraises(TypeError, ' ', 'join')
- self.checkraises(TypeError, ' ', 'join', 7)
- self.checkraises(TypeError, ' ', 'join', [1, 2, bytes()])
- try:
- def f():
- yield 4 + ""
- self.fixtype(' ').join(f())
- except TypeError as e:
- if '+' not in str(e):
- self.fail('join() ate exception message')
- else:
- self.fail('exception not raised')
-
- def test_formatting(self):
- self.checkequal('+hello+', '+%s+', '__mod__', 'hello')
- self.checkequal('+10+', '+%d+', '__mod__', 10)
- self.checkequal('a', "%c", '__mod__', "a")
- self.checkequal('a', "%c", '__mod__', "a")
- self.checkequal('"', "%c", '__mod__', 34)
- self.checkequal('$', "%c", '__mod__', 36)
- self.checkequal('10', "%d", '__mod__', 10)
- self.checkequal('\x7f', "%c", '__mod__', 0x7f)
-
- for ordinal in (-100, 0x200000):
- # unicode raises ValueError, str raises OverflowError
- self.checkraises((ValueError, OverflowError), '%c', '__mod__', ordinal)
-
- longvalue = sys.maxsize + 10
- slongvalue = str(longvalue)
- self.checkequal(' 42', '%3ld', '__mod__', 42)
- self.checkequal('42', '%d', '__mod__', 42.0)
- self.checkequal(slongvalue, '%d', '__mod__', longvalue)
- self.checkcall('%d', '__mod__', float(longvalue))
- self.checkequal('0042.00', '%07.2f', '__mod__', 42)
- self.checkequal('0042.00', '%07.2F', '__mod__', 42)
-
- self.checkraises(TypeError, 'abc', '__mod__')
- self.checkraises(TypeError, '%(foo)s', '__mod__', 42)
- self.checkraises(TypeError, '%s%s', '__mod__', (42,))
- self.checkraises(TypeError, '%c', '__mod__', (None,))
- self.checkraises(ValueError, '%(foo', '__mod__', {})
- self.checkraises(TypeError, '%(foo)s %(bar)s', '__mod__', ('foo', 42))
- self.checkraises(TypeError, '%d', '__mod__', "42") # not numeric
- self.checkraises(TypeError, '%d', '__mod__', (42+0j)) # no int conversion provided
-
- # argument names with properly nested brackets are supported
- self.checkequal('bar', '%((foo))s', '__mod__', {'(foo)': 'bar'})
-
- # 100 is a magic number in PyUnicode_Format, this forces a resize
- self.checkequal(103*'a'+'x', '%sx', '__mod__', 103*'a')
-
- self.checkraises(TypeError, '%*s', '__mod__', ('foo', 'bar'))
- self.checkraises(TypeError, '%10.*f', '__mod__', ('foo', 42.))
- self.checkraises(ValueError, '%10', '__mod__', (42,))
-
- # Outrageously large width or precision should raise ValueError.
- self.checkraises(ValueError, '%%%df' % (2**64), '__mod__', (3.2))
- self.checkraises(ValueError, '%%.%df' % (2**64), '__mod__', (3.2))
-
- self.checkraises(OverflowError, '%*s', '__mod__',
- (_testcapi.PY_SSIZE_T_MAX + 1, ''))
- self.checkraises(OverflowError, '%.*f', '__mod__',
- (_testcapi.INT_MAX + 1, 1. / 7))
- # Issue 15989
- self.checkraises(OverflowError, '%*s', '__mod__',
- (1 << (_testcapi.PY_SSIZE_T_MAX.bit_length() + 1), ''))
- self.checkraises(OverflowError, '%.*f', '__mod__',
- (_testcapi.UINT_MAX + 1, 1. / 7))
-
- class X(object): pass
- self.checkraises(TypeError, 'abc', '__mod__', X())
-
- def test_floatformatting(self):
- # float formatting
- for prec in range(100):
- format = '%%.%if' % prec
- value = 0.01
- for x in range(60):
- value = value * 3.14159265359 / 3.0 * 10.0
- self.checkcall(format, "__mod__", value)
-
- def test_inplace_rewrites(self):
- # Check that strings don't copy and modify cached single-character strings
- self.checkequal('a', 'A', 'lower')
- self.checkequal(True, 'A', 'isupper')
- self.checkequal('A', 'a', 'upper')
- self.checkequal(True, 'a', 'islower')
-
- self.checkequal('a', 'A', 'replace', 'A', 'a')
- self.checkequal(True, 'A', 'isupper')
-
- self.checkequal('A', 'a', 'capitalize')
- self.checkequal(True, 'a', 'islower')
-
- self.checkequal('A', 'a', 'swapcase')
- self.checkequal(True, 'a', 'islower')
-
- self.checkequal('A', 'a', 'title')
- self.checkequal(True, 'a', 'islower')
-
- def test_partition(self):
-
- self.checkequal(('this is the par', 'ti', 'tion method'),
- 'this is the partition method', 'partition', 'ti')
-
- # from raymond's original specification
- S = 'http://www.python.org'
- self.checkequal(('http', '://', 'www.python.org'), S, 'partition', '://')
- self.checkequal(('http://www.python.org', '', ''), S, 'partition', '?')
- self.checkequal(('', 'http://', 'www.python.org'), S, 'partition', 'http://')
- self.checkequal(('http://www.python.', 'org', ''), S, 'partition', 'org')
-
- self.checkraises(ValueError, S, 'partition', '')
- self.checkraises(TypeError, S, 'partition', None)
-
- def test_rpartition(self):
-
- self.checkequal(('this is the rparti', 'ti', 'on method'),
- 'this is the rpartition method', 'rpartition', 'ti')
-
- # from raymond's original specification
- S = 'http://www.python.org'
- self.checkequal(('http', '://', 'www.python.org'), S, 'rpartition', '://')
- self.checkequal(('', '', 'http://www.python.org'), S, 'rpartition', '?')
- self.checkequal(('', 'http://', 'www.python.org'), S, 'rpartition', 'http://')
- self.checkequal(('http://www.python.', 'org', ''), S, 'rpartition', 'org')
-
- self.checkraises(ValueError, S, 'rpartition', '')
- self.checkraises(TypeError, S, 'rpartition', None)
-
- def test_none_arguments(self):
- # issue 11828
- s = 'hello'
- self.checkequal(2, s, 'find', 'l', None)
- self.checkequal(3, s, 'find', 'l', -2, None)
- self.checkequal(2, s, 'find', 'l', None, -2)
- self.checkequal(0, s, 'find', 'h', None, None)
-
- self.checkequal(3, s, 'rfind', 'l', None)
- self.checkequal(3, s, 'rfind', 'l', -2, None)
- self.checkequal(2, s, 'rfind', 'l', None, -2)
- self.checkequal(0, s, 'rfind', 'h', None, None)
-
- self.checkequal(2, s, 'index', 'l', None)
- self.checkequal(3, s, 'index', 'l', -2, None)
- self.checkequal(2, s, 'index', 'l', None, -2)
- self.checkequal(0, s, 'index', 'h', None, None)
-
- self.checkequal(3, s, 'rindex', 'l', None)
- self.checkequal(3, s, 'rindex', 'l', -2, None)
- self.checkequal(2, s, 'rindex', 'l', None, -2)
- self.checkequal(0, s, 'rindex', 'h', None, None)
-
- self.checkequal(2, s, 'count', 'l', None)
- self.checkequal(1, s, 'count', 'l', -2, None)
- self.checkequal(1, s, 'count', 'l', None, -2)
- self.checkequal(0, s, 'count', 'x', None, None)
-
- self.checkequal(True, s, 'endswith', 'o', None)
- self.checkequal(True, s, 'endswith', 'lo', -2, None)
- self.checkequal(True, s, 'endswith', 'l', None, -2)
- self.checkequal(False, s, 'endswith', 'x', None, None)
-
- self.checkequal(True, s, 'startswith', 'h', None)
- self.checkequal(True, s, 'startswith', 'l', -2, None)
- self.checkequal(True, s, 'startswith', 'h', None, -2)
- self.checkequal(False, s, 'startswith', 'x', None, None)
-
- def test_find_etc_raise_correct_error_messages(self):
- # issue 11828
- s = 'hello'
- x = 'x'
- self.assertRaisesRegex(TypeError, r'^find\(', s.find,
- x, None, None, None)
- self.assertRaisesRegex(TypeError, r'^rfind\(', s.rfind,
- x, None, None, None)
- self.assertRaisesRegex(TypeError, r'^index\(', s.index,
- x, None, None, None)
- self.assertRaisesRegex(TypeError, r'^rindex\(', s.rindex,
- x, None, None, None)
- self.assertRaisesRegex(TypeError, r'^count\(', s.count,
- x, None, None, None)
- self.assertRaisesRegex(TypeError, r'^startswith\(', s.startswith,
- x, None, None, None)
- self.assertRaisesRegex(TypeError, r'^endswith\(', s.endswith,
- x, None, None, None)
-
- # issue #15534
- self.checkequal(10, "...\u043c......<", "find", "<")
-
-
-class MixinStrUnicodeTest(object):
- # Additional tests that only work with str and unicode.
-
- def test_bug1001011(self):
- # Make sure join returns a NEW object for single item sequences
- # involving a subclass.
- # Make sure that it is of the appropriate type.
- # Check the optimisation still occurs for standard objects.
- t = self.type2test
- class subclass(t):
- pass
- s1 = subclass("abcd")
- s2 = t().join([s1])
- self.assertIsNot(s1, s2)
- self.assertIs(type(s2), t)
-
- s1 = t("abcd")
- s2 = t().join([s1])
- self.assertIs(s1, s2)
-
- # Should also test mixed-type join.
- if t is str:
- s1 = subclass("abcd")
- s2 = "".join([s1])
- self.assertIsNot(s1, s2)
- self.assertIs(type(s2), t)
-
- s1 = t("abcd")
- s2 = "".join([s1])
- self.assertIs(s1, s2)
-
-## elif t is str8:
-## s1 = subclass("abcd")
-## s2 = "".join([s1])
-## self.assertIsNot(s1, s2)
-## self.assertIs(type(s2), str) # promotes!
-
-## s1 = t("abcd")
-## s2 = "".join([s1])
-## self.assertIsNot(s1, s2)
-## self.assertIs(type(s2), str) # promotes!
-
- else:
- self.fail("unexpected type for MixinStrUnicodeTest %r" % t)
-
diff --git a/future/tests/base.py b/future/tests/base.py
deleted file mode 100644
index e430684f..00000000
--- a/future/tests/base.py
+++ /dev/null
@@ -1,278 +0,0 @@
-import os
-import tempfile
-import unittest
-if not hasattr(unittest, 'skip'):
- import unittest2 as unittest
-
-from textwrap import dedent
-import subprocess
-
-# For Python 2.6 compatibility: see http://stackoverflow.com/questions/4814970/
-if "check_output" not in dir(subprocess): # duck punch it in!
- def f(*popenargs, **kwargs):
- if 'stdout' in kwargs:
- raise ValueError('stdout argument not allowed, it will be overridden.')
- process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
- output, unused_err = process.communicate()
- retcode = process.poll()
- if retcode:
- cmd = kwargs.get("args")
- if cmd is None:
- cmd = popenargs[0]
- raise subprocess.CalledProcessError(retcode, cmd)
- return output
- subprocess.check_output = f
-
-class CodeHandler(unittest.TestCase):
- """
- Handy mixin for test classes for writing / reading / futurizing /
- running .py files in the test suite.
- """
- def setUp(self):
- """
- The outputs from the various futurize stages should have the
- following headers:
- """
- # After stage1:
- # TODO: use this form after implementing a fixer to consolidate
- # __future__ imports into a single line:
- # self.headers1 = """
- # from __future__ import absolute_import, division, print_function
- # """
- self.headers1 = self.reformat("""
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- """)
-
- # After stage2:
- # TODO: use this form after implementing a fixer to consolidate
- # __future__ imports into a single line:
- # self.headers2 = """
- # from __future__ import (absolute_import, division,
- # print_function, unicode_literals)
- # from future import standard_library
- # from future.builtins import *
- # """
- self.headers2 = self.reformat("""
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- from __future__ import unicode_literals
- from future import standard_library
- from future.builtins import *
- """)
- self.interpreters = ['python']
- self.tempdir = tempfile.mkdtemp() + os.path.sep
- self.env = {'PYTHONPATH': os.getcwd()}
-
- def convert(self, code, stages=(1, 2), all_imports=False, from3=False,
- reformat=True, tobytes=True, run=True):
- """
- Converts the code block using ``futurize`` and returns the
- resulting code.
-
- Passing stages=[1] or stages=[2] passes the flag ``--stage1`` or
- ``stage2`` to ``futurize``. Passing both stages runs ``futurize``
- with both stages by default.
-
- If from3 is False, runs ``futurize`` in the default mode,
- converting from Python 2 to both 2 and 3. If from3 is True, runs
- ``futurize --from3`` to convert from Python 3 to both 2 and 3.
-
- Optionally reformats the code block first using the reformat()
- method.
-
- If run is True, runs the resulting code under all Python
- interpreters in self.interpreters.
- """
- if reformat:
- code = self.reformat(code)
- self._write_test_script(code)
- self._futurize_test_script(stages=stages, all_imports=all_imports,
- from3=from3, tobytes=tobytes)
- output = self._read_test_script()
- if run:
- for interpreter in self.interpreters:
- _ = self._run_test_script(interpreter=interpreter)
- return output
-
- def reformat(self, code):
- """
- Removes any leading \n and dedents.
- """
- if code.startswith('\n'):
- code = code[1:]
- return dedent(code)
-
- def check(self, output, expected, ignore_imports=True):
- """
- Compares whether the code blocks are equal. If not, raises an
- exception so the test fails. Ignores any trailing whitespace like
- blank lines.
-
- If ignore_imports is True, passes the code blocks into the
- strip_future_imports method.
- """
- # self.assertEqual(expected.rstrip(),
- # self.order_future_lines(output).rstrip())
- if ignore_imports:
- output = self.strip_future_imports(output)
- expected = self.strip_future_imports(expected)
- self.assertEqual(self.order_future_lines(output.rstrip()),
- expected.rstrip())
-
- def strip_future_imports(self, code):
- """
- Strips any of these import lines:
-
- from __future__ import
- from future
- from future.
-
- Limitation: doesn't handle imports split across multiple lines like
- this:
-
- from __future__ import (absolute_import, division, print_function,
- unicode_literals)
- """
- output = []
- for line in code.splitlines():
- if not (line.startswith('from __future__ import ')
- or line.startswith('from future ')
- # but don't match "from future_builtins" :)
- or line.startswith('from future.')):
- output.append(line)
- return '\n'.join(output)
-
- def convert_check(self, before, expected, stages=(1, 2),
- all_imports=False, ignore_imports=True, from3=False,
- tobytes=False, run=True):
- """
- Convenience method that calls convert() and check().
-
- Reformats the code blocks automatically using the reformat()
- method.
-
- If all_imports is passed, we add the appropriate import headers
- for the stage(s) selected to the ``expected`` code-block, so they
- needn't appear repeatedly in the test code.
-
- If ignore_imports is True, ignores the presence of any lines
- beginning:
-
- from __future__ import ...
- from future import ...
-
- for the purpose of the comparison.
- """
- output = self.convert(before, stages=stages,
- all_imports=all_imports, from3=from3,
- tobytes=tobytes, run=run)
- if all_imports:
- headers = self.headers2 if 2 in stages else self.headers1
- else:
- headers = ''
-
- self.check(output, self.reformat(headers + expected),
- ignore_imports=ignore_imports)
-
- def check_old(self, output, expected, stages=(1, 2), ignore_imports=True):
- """
- Checks that the output is equal to the expected output, after
- reformatting.
-
- Pass ``expected`` as a string (as a code block). It will be
- reformatted and compared with the resulting code. We assert that
- the output of the conversion of ``before`` with ``futurize`` is
- equal to ``after``. Unless ignore_imports is True, the
- appropriate headers for the stage(s) used are added automatically
- for the comparison.
- """
- headers = ''
- # if not ignore_imports:
- # if 2 in stages:
- # headers = self.headers2
- # else:
- # headers = self.headers1
- self.compare(output, headers + self.reformat(expected),
- ignore_imports=ignore_imports)
-
- def order_future_lines(self, code):
- """
- TODO: simplify this hideous code ...
-
- Returns the code block with any ``__future__`` import lines sorted, and
- then any ``future`` import lines sorted.
- """
- codelines = code.splitlines()
- # Under under future lines:
- uufuture_line_numbers = [i for i in range(len(codelines)) if codelines[i].startswith('from __future__ import ')]
- sorted_uufuture_lines = sorted([codelines[i] for i in uufuture_line_numbers])
-
- # future import lines:
- future_line_numbers = [i for i in range(len(codelines)) if codelines[i].startswith('from future')]
- sorted_future_lines = sorted([codelines[i] for i in future_line_numbers])
-
- # Replace the old unsorted "from __future__ import ..." lines with the
- # new sorted ones:
- codelines2 = []
- for i in range(len(codelines)):
- if i in uufuture_line_numbers:
- codelines2.append(sorted_uufuture_lines[i])
- elif i in future_line_numbers:
- codelines2.append(sorted_future_lines[i - len(uufuture_line_numbers)])
- else:
- codelines2.append(codelines[i])
- return '\n'.join(codelines2)
-
- def unchanged(self, code, **kwargs):
- """
- Convenience method to ensure the code is unchanged by the
- futurize process.
- """
- self.convert_check(code, code, **kwargs)
-
- def _write_test_script(self, code, filename='mytestscript.py'):
- """
- Dedents the given code (a multiline string) and writes it out to
- a file in a temporary folder like /tmp/tmpUDCn7x/mytestscript.py.
- """
- with open(self.tempdir + filename, 'w') as f:
- f.write(dedent(code))
-
- def _read_test_script(self, filename='mytestscript.py'):
- with open(self.tempdir + filename) as f:
- newsource = f.read()
- return newsource
-
- def _futurize_test_script(self, filename='mytestscript.py', stages=(1, 2),
- all_imports=False, from3=False, tobytes=False):
- params = []
- stages = list(stages)
- if all_imports:
- params.append('--all-imports')
- if from3:
- params.append('--from3')
- if tobytes:
- params.append('--tobytes')
- if stages == [1]:
- params.append('--stage1')
- elif stages == [2]:
- params.append('--stage2')
- else:
- assert stages == [1, 2]
- # No extra params needed
-
- output = subprocess.check_output(['python', 'futurize.py'] + params +
- ['-w', self.tempdir + filename],
- stderr=subprocess.STDOUT)
- return output
-
- def _run_test_script(self, filename='mytestscript.py',
- interpreter='python'):
- env = {'PYTHONPATH': os.getcwd()}
- return subprocess.check_output([interpreter, self.tempdir + filename],
- env=env)
-
-
diff --git a/future/tests/disabled/test_bytes_from_py33.py b/future/tests/disabled/test_bytes_from_py33.py
deleted file mode 100644
index 329e99da..00000000
--- a/future/tests/disabled/test_bytes_from_py33.py
+++ /dev/null
@@ -1,1425 +0,0 @@
-"""Unit tests for the bytes and bytearray types.
-
-XXX This is a mess. Common tests should be moved to buffer_tests.py,
-which itself ought to be unified with string_tests.py (and the latter
-should be modernized).
-"""
-from __future__ import (absolute_import, division,
- print_function, unicode_literals)
-from future import standard_library
-from future.builtins import *
-
-import os
-import re
-import sys
-import copy
-import functools
-import pickle
-import tempfile
-import unittest
-
-# Some Python installations (e.g. travis-ci.org Py2.7 and Py3.3) are
-# missing the Python test suite (and there's no Ubuntu 12.04 package to
-# install it), although a skeleton test package still exists with
-# a couple of modules like test.support. We probably don't want to
-# install standard library hooks ever on Py3, so the two imports below
-# will fail. In this case, just exit. No probem: on Py3 we don't redefine
-# bytes anyway.
-try:
- import test.support
- import test.string_tests
- import test.buffer_tests
-except ImportError:
- from future.utils import PY3
- if PY3:
- sys.exit(0)
- else:
- raise
-
-if sys.flags.bytes_warning:
- def check_bytes_warnings(func):
- @functools.wraps(func)
- def wrapper(*args, **kw):
- with test.support.check_warnings(('', BytesWarning)):
- return func(*args, **kw)
- return wrapper
-else:
- # no-op
- def check_bytes_warnings(func):
- return func
-
-
-class Indexable:
- def __init__(self, value=0):
- self.value = value
- def __index__(self):
- return self.value
-
-
-class BaseBytesTest:
-
- def test_basics(self):
- b = self.type2test()
- self.assertEqual(type(b), self.type2test)
- self.assertEqual(b.__class__, self.type2test)
-
- def test_copy(self):
- a = self.type2test(b"abcd")
- for copy_method in (copy.copy, copy.deepcopy):
- b = copy_method(a)
- self.assertEqual(a, b)
- self.assertEqual(type(a), type(b))
-
- def test_empty_sequence(self):
- b = self.type2test()
- self.assertEqual(len(b), 0)
- self.assertRaises(IndexError, lambda: b[0])
- self.assertRaises(IndexError, lambda: b[1])
- self.assertRaises(IndexError, lambda: b[sys.maxsize])
- self.assertRaises(IndexError, lambda: b[sys.maxsize+1])
- self.assertRaises(IndexError, lambda: b[10**100])
- self.assertRaises(IndexError, lambda: b[-1])
- self.assertRaises(IndexError, lambda: b[-2])
- self.assertRaises(IndexError, lambda: b[-sys.maxsize])
- self.assertRaises(IndexError, lambda: b[-sys.maxsize-1])
- self.assertRaises(IndexError, lambda: b[-sys.maxsize-2])
- self.assertRaises(IndexError, lambda: b[-10**100])
-
- def test_from_list(self):
- ints = list(range(256))
- b = self.type2test(i for i in ints)
- self.assertEqual(len(b), 256)
- self.assertEqual(list(b), ints)
-
- def test_from_index(self):
- b = self.type2test([Indexable(), Indexable(1), Indexable(254),
- Indexable(255)])
- self.assertEqual(list(b), [0, 1, 254, 255])
- self.assertRaises(ValueError, self.type2test, [Indexable(-1)])
- self.assertRaises(ValueError, self.type2test, [Indexable(256)])
-
- def test_from_ssize(self):
- self.assertEqual(self.type2test(0), b'')
- self.assertEqual(self.type2test(1), b'\x00')
- self.assertEqual(self.type2test(5), b'\x00\x00\x00\x00\x00')
- self.assertRaises(ValueError, self.type2test, -1)
-
- self.assertEqual(self.type2test('0', 'ascii'), b'0')
- self.assertEqual(self.type2test(b'0'), b'0')
- self.assertRaises(OverflowError, self.type2test, sys.maxsize + 1)
-
- def test_constructor_type_errors(self):
- self.assertRaises(TypeError, self.type2test, 0.0)
- class C:
- pass
- self.assertRaises(TypeError, self.type2test, ["0"])
- self.assertRaises(TypeError, self.type2test, [0.0])
- self.assertRaises(TypeError, self.type2test, [None])
- self.assertRaises(TypeError, self.type2test, [C()])
-
- def test_constructor_value_errors(self):
- self.assertRaises(ValueError, self.type2test, [-1])
- self.assertRaises(ValueError, self.type2test, [-sys.maxsize])
- self.assertRaises(ValueError, self.type2test, [-sys.maxsize-1])
- self.assertRaises(ValueError, self.type2test, [-sys.maxsize-2])
- self.assertRaises(ValueError, self.type2test, [-10**100])
- self.assertRaises(ValueError, self.type2test, [256])
- self.assertRaises(ValueError, self.type2test, [257])
- self.assertRaises(ValueError, self.type2test, [sys.maxsize])
- self.assertRaises(ValueError, self.type2test, [sys.maxsize+1])
- self.assertRaises(ValueError, self.type2test, [10**100])
-
- def test_compare(self):
- b1 = self.type2test([1, 2, 3])
- b2 = self.type2test([1, 2, 3])
- b3 = self.type2test([1, 3])
-
- self.assertEqual(b1, b2)
- self.assertTrue(b2 != b3)
- self.assertTrue(b1 <= b2)
- self.assertTrue(b1 <= b3)
- self.assertTrue(b1 < b3)
- self.assertTrue(b1 >= b2)
- self.assertTrue(b3 >= b2)
- self.assertTrue(b3 > b2)
-
- self.assertFalse(b1 != b2)
- self.assertFalse(b2 == b3)
- self.assertFalse(b1 > b2)
- self.assertFalse(b1 > b3)
- self.assertFalse(b1 >= b3)
- self.assertFalse(b1 < b2)
- self.assertFalse(b3 < b2)
- self.assertFalse(b3 <= b2)
-
- @check_bytes_warnings
- def test_compare_to_str(self):
- # Byte comparisons with unicode should always fail!
- # Test this for all expected byte orders and Unicode character
- # sizes.
- self.assertEqual(self.type2test(b"\0a\0b\0c") == "abc", False)
- self.assertEqual(self.type2test(b"\0\0\0a\0\0\0b\0\0\0c") == "abc",
- False)
- self.assertEqual(self.type2test(b"a\0b\0c\0") == "abc", False)
- self.assertEqual(self.type2test(b"a\0\0\0b\0\0\0c\0\0\0") == "abc",
- False)
- self.assertEqual(self.type2test() == str(), False)
- self.assertEqual(self.type2test() != str(), True)
-
- def test_reversed(self):
- input = list(map(ord, "Hello"))
- b = self.type2test(input)
- output = list(reversed(b))
- input.reverse()
- self.assertEqual(output, input)
-
- def test_getslice(self):
- def by(s):
- return self.type2test(map(ord, s))
- b = by("Hello, world")
-
- self.assertEqual(b[:5], by("Hello"))
- self.assertEqual(b[1:5], by("ello"))
- self.assertEqual(b[5:7], by(", "))
- self.assertEqual(b[7:], by("world"))
- self.assertEqual(b[7:12], by("world"))
- self.assertEqual(b[7:100], by("world"))
-
- self.assertEqual(b[:-7], by("Hello"))
- self.assertEqual(b[-11:-7], by("ello"))
- self.assertEqual(b[-7:-5], by(", "))
- self.assertEqual(b[-5:], by("world"))
- self.assertEqual(b[-5:12], by("world"))
- self.assertEqual(b[-5:100], by("world"))
- self.assertEqual(b[-100:5], by("Hello"))
-
- def test_extended_getslice(self):
- # Test extended slicing by comparing with list slicing.
- L = list(range(255))
- b = self.type2test(L)
- indices = (0, None, 1, 3, 19, 100, -1, -2, -31, -100)
- for start in indices:
- for stop in indices:
- # Skip step 0 (invalid)
- for step in indices[1:]:
- self.assertEqual(b[start:stop:step], self.type2test(L[start:stop:step]))
-
- def test_encoding(self):
- sample = "Hello world\n\u1234\u5678\u9abc"
- for enc in ("utf-8", "utf-16"):
- b = self.type2test(sample, enc)
- self.assertEqual(b, self.type2test(sample.encode(enc)))
- self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin-1")
- b = self.type2test(sample, "latin-1", "ignore")
- self.assertEqual(b, self.type2test(sample[:-3], "utf-8"))
-
- def test_decode(self):
- sample = "Hello world\n\u1234\u5678\u9abc\def0\def0"
- for enc in ("utf-8", "utf-16"):
- b = self.type2test(sample, enc)
- self.assertEqual(b.decode(enc), sample)
- sample = "Hello world\n\x80\x81\xfe\xff"
- b = self.type2test(sample, "latin-1")
- self.assertRaises(UnicodeDecodeError, b.decode, "utf-8")
- self.assertEqual(b.decode("utf-8", "ignore"), "Hello world\n")
- self.assertEqual(b.decode(errors="ignore", encoding="utf-8"),
- "Hello world\n")
- # Default encoding is utf-8
- self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603')
-
- def test_from_int(self):
- b = self.type2test(0)
- self.assertEqual(b, self.type2test())
- b = self.type2test(10)
- self.assertEqual(b, self.type2test([0]*10))
- b = self.type2test(10000)
- self.assertEqual(b, self.type2test([0]*10000))
-
- def test_concat(self):
- b1 = self.type2test(b"abc")
- b2 = self.type2test(b"def")
- self.assertEqual(b1 + b2, b"abcdef")
- self.assertEqual(b1 + bytes(b"def"), b"abcdef")
- self.assertEqual(bytes(b"def") + b1, b"defabc")
- self.assertRaises(TypeError, lambda: b1 + "def")
- self.assertRaises(TypeError, lambda: "abc" + b2)
-
- def test_repeat(self):
- for b in b"abc", self.type2test(b"abc"):
- self.assertEqual(b * 3, b"abcabcabc")
- self.assertEqual(b * 0, b"")
- self.assertEqual(b * -1, b"")
- self.assertRaises(TypeError, lambda: b * 3.14)
- self.assertRaises(TypeError, lambda: 3.14 * b)
- # XXX Shouldn't bytes and bytearray agree on what to raise?
- with self.assertRaises((OverflowError, MemoryError)):
- c = b * sys.maxsize
- with self.assertRaises((OverflowError, MemoryError)):
- b *= sys.maxsize
-
- def test_repeat_1char(self):
- self.assertEqual(self.type2test(b'x')*100, self.type2test([ord('x')]*100))
-
- def test_contains(self):
- b = self.type2test(b"abc")
- self.assertIn(ord('a'), b)
- self.assertIn(int(ord('a')), b)
- self.assertNotIn(200, b)
- self.assertRaises(ValueError, lambda: 300 in b)
- self.assertRaises(ValueError, lambda: -1 in b)
- self.assertRaises(TypeError, lambda: None in b)
- self.assertRaises(TypeError, lambda: float(ord('a')) in b)
- self.assertRaises(TypeError, lambda: "a" in b)
- for f in bytes, bytearray:
- self.assertIn(f(b""), b)
- self.assertIn(f(b"a"), b)
- self.assertIn(f(b"b"), b)
- self.assertIn(f(b"c"), b)
- self.assertIn(f(b"ab"), b)
- self.assertIn(f(b"bc"), b)
- self.assertIn(f(b"abc"), b)
- self.assertNotIn(f(b"ac"), b)
- self.assertNotIn(f(b"d"), b)
- self.assertNotIn(f(b"dab"), b)
- self.assertNotIn(f(b"abd"), b)
-
- def test_fromhex(self):
- self.assertRaises(TypeError, self.type2test.fromhex)
- self.assertRaises(TypeError, self.type2test.fromhex, 1)
- self.assertEqual(self.type2test.fromhex(''), self.type2test())
- b = bytearray([0x1a, 0x2b, 0x30])
- self.assertEqual(self.type2test.fromhex('1a2B30'), b)
- self.assertEqual(self.type2test.fromhex(' 1A 2B 30 '), b)
- self.assertEqual(self.type2test.fromhex('0000'), b'\0\0')
- self.assertRaises(TypeError, self.type2test.fromhex, b'1B')
- self.assertRaises(ValueError, self.type2test.fromhex, 'a')
- self.assertRaises(ValueError, self.type2test.fromhex, 'rt')
- self.assertRaises(ValueError, self.type2test.fromhex, '1a b cd')
- self.assertRaises(ValueError, self.type2test.fromhex, '\x00')
- self.assertRaises(ValueError, self.type2test.fromhex, '12 \x00 34')
-
- def test_join(self):
- self.assertEqual(self.type2test(b"").join([]), b"")
- self.assertEqual(self.type2test(b"").join([b""]), b"")
- for lst in [[b"abc"], [b"a", b"bc"], [b"ab", b"c"], [b"a", b"b", b"c"]]:
- lst = list(map(self.type2test, lst))
- self.assertEqual(self.type2test(b"").join(lst), b"abc")
- self.assertEqual(self.type2test(b"").join(tuple(lst)), b"abc")
- self.assertEqual(self.type2test(b"").join(iter(lst)), b"abc")
- self.assertEqual(self.type2test(b".").join([b"ab", b"cd"]), b"ab.cd")
- # XXX more...
-
- def test_count(self):
- b = self.type2test(b'mississippi')
- i = 105
- p = 112
- w = 119
-
- self.assertEqual(b.count(b'i'), 4)
- self.assertEqual(b.count(b'ss'), 2)
- self.assertEqual(b.count(b'w'), 0)
-
- self.assertEqual(b.count(i), 4)
- self.assertEqual(b.count(w), 0)
-
- self.assertEqual(b.count(b'i', 6), 2)
- self.assertEqual(b.count(b'p', 6), 2)
- self.assertEqual(b.count(b'i', 1, 3), 1)
- self.assertEqual(b.count(b'p', 7, 9), 1)
-
- self.assertEqual(b.count(i, 6), 2)
- self.assertEqual(b.count(p, 6), 2)
- self.assertEqual(b.count(i, 1, 3), 1)
- self.assertEqual(b.count(p, 7, 9), 1)
-
- def test_startswith(self):
- b = self.type2test(b'hello')
- self.assertFalse(self.type2test().startswith(b"anything"))
- self.assertTrue(b.startswith(b"hello"))
- self.assertTrue(b.startswith(b"hel"))
- self.assertTrue(b.startswith(b"h"))
- self.assertFalse(b.startswith(b"hellow"))
- self.assertFalse(b.startswith(b"ha"))
- with self.assertRaises(TypeError) as cm:
- b.startswith([b'h'])
- exc = str(cm.exception)
- self.assertIn('bytes', exc)
- self.assertIn('tuple', exc)
-
- def test_endswith(self):
- b = self.type2test(b'hello')
- self.assertFalse(bytearray().endswith(b"anything"))
- self.assertTrue(b.endswith(b"hello"))
- self.assertTrue(b.endswith(b"llo"))
- self.assertTrue(b.endswith(b"o"))
- self.assertFalse(b.endswith(b"whello"))
- self.assertFalse(b.endswith(b"no"))
- with self.assertRaises(TypeError) as cm:
- b.endswith([b'o'])
- exc = str(cm.exception)
- self.assertIn('bytes', exc)
- self.assertIn('tuple', exc)
-
- def test_find(self):
- b = self.type2test(b'mississippi')
- i = 105
- w = 119
-
- self.assertEqual(b.find(b'ss'), 2)
- self.assertEqual(b.find(b'w'), -1)
- self.assertEqual(b.find(b'mississippian'), -1)
-
- self.assertEqual(b.find(i), 1)
- self.assertEqual(b.find(w), -1)
-
- self.assertEqual(b.find(b'ss', 3), 5)
- self.assertEqual(b.find(b'ss', 1, 7), 2)
- self.assertEqual(b.find(b'ss', 1, 3), -1)
-
- self.assertEqual(b.find(i, 6), 7)
- self.assertEqual(b.find(i, 1, 3), 1)
- self.assertEqual(b.find(w, 1, 3), -1)
-
- for index in (-1, 256, sys.maxsize + 1):
- self.assertRaisesRegex(
- ValueError, r'byte must be in range\(0, 256\)',
- b.find, index)
-
- def test_rfind(self):
- b = self.type2test(b'mississippi')
- i = 105
- w = 119
-
- self.assertEqual(b.rfind(b'ss'), 5)
- self.assertEqual(b.rfind(b'w'), -1)
- self.assertEqual(b.rfind(b'mississippian'), -1)
-
- self.assertEqual(b.rfind(i), 10)
- self.assertEqual(b.rfind(w), -1)
-
- self.assertEqual(b.rfind(b'ss', 3), 5)
- self.assertEqual(b.rfind(b'ss', 0, 6), 2)
-
- self.assertEqual(b.rfind(i, 1, 3), 1)
- self.assertEqual(b.rfind(i, 3, 9), 7)
- self.assertEqual(b.rfind(w, 1, 3), -1)
-
- def test_index(self):
- b = self.type2test(b'mississippi')
- i = 105
- w = 119
-
- self.assertEqual(b.index(b'ss'), 2)
- self.assertRaises(ValueError, b.index, b'w')
- self.assertRaises(ValueError, b.index, b'mississippian')
-
- self.assertEqual(b.index(i), 1)
- self.assertRaises(ValueError, b.index, w)
-
- self.assertEqual(b.index(b'ss', 3), 5)
- self.assertEqual(b.index(b'ss', 1, 7), 2)
- self.assertRaises(ValueError, b.index, b'ss', 1, 3)
-
- self.assertEqual(b.index(i, 6), 7)
- self.assertEqual(b.index(i, 1, 3), 1)
- self.assertRaises(ValueError, b.index, w, 1, 3)
-
- def test_rindex(self):
- b = self.type2test(b'mississippi')
- i = 105
- w = 119
-
- self.assertEqual(b.rindex(b'ss'), 5)
- self.assertRaises(ValueError, b.rindex, b'w')
- self.assertRaises(ValueError, b.rindex, b'mississippian')
-
- self.assertEqual(b.rindex(i), 10)
- self.assertRaises(ValueError, b.rindex, w)
-
- self.assertEqual(b.rindex(b'ss', 3), 5)
- self.assertEqual(b.rindex(b'ss', 0, 6), 2)
-
- self.assertEqual(b.rindex(i, 1, 3), 1)
- self.assertEqual(b.rindex(i, 3, 9), 7)
- self.assertRaises(ValueError, b.rindex, w, 1, 3)
-
- def test_replace(self):
- b = self.type2test(b'mississippi')
- self.assertEqual(b.replace(b'i', b'a'), b'massassappa')
- self.assertEqual(b.replace(b'ss', b'x'), b'mixixippi')
-
- def test_split(self):
- b = self.type2test(b'mississippi')
- self.assertEqual(b.split(b'i'), [b'm', b'ss', b'ss', b'pp', b''])
- self.assertEqual(b.split(b'ss'), [b'mi', b'i', b'ippi'])
- self.assertEqual(b.split(b'w'), [b])
- # with keyword args
- b = self.type2test(b'a|b|c|d')
- self.assertEqual(b.split(sep=b'|'), [b'a', b'b', b'c', b'd'])
- self.assertEqual(b.split(b'|', maxsplit=1), [b'a', b'b|c|d'])
- self.assertEqual(b.split(sep=b'|', maxsplit=1), [b'a', b'b|c|d'])
- self.assertEqual(b.split(maxsplit=1, sep=b'|'), [b'a', b'b|c|d'])
- b = self.type2test(b'a b c d')
- self.assertEqual(b.split(maxsplit=1), [b'a', b'b c d'])
-
- def test_split_whitespace(self):
- for b in (b' arf barf ', b'arf\tbarf', b'arf\nbarf', b'arf\rbarf',
- b'arf\fbarf', b'arf\vbarf'):
- b = self.type2test(b)
- self.assertEqual(b.split(), [b'arf', b'barf'])
- self.assertEqual(b.split(None), [b'arf', b'barf'])
- self.assertEqual(b.split(None, 2), [b'arf', b'barf'])
- for b in (b'a\x1Cb', b'a\x1Db', b'a\x1Eb', b'a\x1Fb'):
- b = self.type2test(b)
- self.assertEqual(b.split(), [b])
- self.assertEqual(self.type2test(b' a bb c ').split(None, 0), [b'a bb c '])
- self.assertEqual(self.type2test(b' a bb c ').split(None, 1), [b'a', b'bb c '])
- self.assertEqual(self.type2test(b' a bb c ').split(None, 2), [b'a', b'bb', b'c '])
- self.assertEqual(self.type2test(b' a bb c ').split(None, 3), [b'a', b'bb', b'c'])
-
- def test_split_string_error(self):
- self.assertRaises(TypeError, self.type2test(b'a b').split, ' ')
-
- def test_split_unicodewhitespace(self):
- b = self.type2test(b"\x09\x0A\x0B\x0C\x0D\x1C\x1D\x1E\x1F")
- self.assertEqual(b.split(), [b'\x1c\x1d\x1e\x1f'])
-
- def test_rsplit(self):
- b = self.type2test(b'mississippi')
- self.assertEqual(b.rsplit(b'i'), [b'm', b'ss', b'ss', b'pp', b''])
- self.assertEqual(b.rsplit(b'ss'), [b'mi', b'i', b'ippi'])
- self.assertEqual(b.rsplit(b'w'), [b])
- # with keyword args
- b = self.type2test(b'a|b|c|d')
- self.assertEqual(b.rsplit(sep=b'|'), [b'a', b'b', b'c', b'd'])
- self.assertEqual(b.rsplit(b'|', maxsplit=1), [b'a|b|c', b'd'])
- self.assertEqual(b.rsplit(sep=b'|', maxsplit=1), [b'a|b|c', b'd'])
- self.assertEqual(b.rsplit(maxsplit=1, sep=b'|'), [b'a|b|c', b'd'])
- b = self.type2test(b'a b c d')
- self.assertEqual(b.rsplit(maxsplit=1), [b'a b c', b'd'])
-
- def test_rsplit_whitespace(self):
- for b in (b' arf barf ', b'arf\tbarf', b'arf\nbarf', b'arf\rbarf',
- b'arf\fbarf', b'arf\vbarf'):
- b = self.type2test(b)
- self.assertEqual(b.rsplit(), [b'arf', b'barf'])
- self.assertEqual(b.rsplit(None), [b'arf', b'barf'])
- self.assertEqual(b.rsplit(None, 2), [b'arf', b'barf'])
- self.assertEqual(self.type2test(b' a bb c ').rsplit(None, 0), [b' a bb c'])
- self.assertEqual(self.type2test(b' a bb c ').rsplit(None, 1), [b' a bb', b'c'])
- self.assertEqual(self.type2test(b' a bb c ').rsplit(None, 2), [b' a', b'bb', b'c'])
- self.assertEqual(self.type2test(b' a bb c ').rsplit(None, 3), [b'a', b'bb', b'c'])
-
- def test_rsplit_string_error(self):
- self.assertRaises(TypeError, self.type2test(b'a b').rsplit, ' ')
-
- def test_rsplit_unicodewhitespace(self):
- b = self.type2test(b"\x09\x0A\x0B\x0C\x0D\x1C\x1D\x1E\x1F")
- self.assertEqual(b.rsplit(), [b'\x1c\x1d\x1e\x1f'])
-
- def test_partition(self):
- b = self.type2test(b'mississippi')
- self.assertEqual(b.partition(b'ss'), (b'mi', b'ss', b'issippi'))
- self.assertEqual(b.partition(b'w'), (b'mississippi', b'', b''))
-
- def test_rpartition(self):
- b = self.type2test(b'mississippi')
- self.assertEqual(b.rpartition(b'ss'), (b'missi', b'ss', b'ippi'))
- self.assertEqual(b.rpartition(b'i'), (b'mississipp', b'i', b''))
- self.assertEqual(b.rpartition(b'w'), (b'', b'', b'mississippi'))
-
- def test_pickling(self):
- for proto in range(pickle.HIGHEST_PROTOCOL + 1):
- for b in b"", b"a", b"abc", b"\xffab\x80", b"\0\0\377\0\0":
- b = self.type2test(b)
- ps = pickle.dumps(b, proto)
- q = pickle.loads(ps)
- self.assertEqual(b, q)
-
- def test_iterator_pickling(self):
- for b in b"", b"a", b"abc", b"\xffab\x80", b"\0\0\377\0\0":
- it = itorg = iter(self.type2test(b))
- data = list(self.type2test(b))
- d = pickle.dumps(it)
- it = pickle.loads(d)
- self.assertEqual(type(itorg), type(it))
- self.assertEqual(list(it), data)
-
- it = pickle.loads(d)
- try:
- next(it)
- except StopIteration:
- continue
- d = pickle.dumps(it)
- it = pickle.loads(d)
- self.assertEqual(list(it), data[1:])
-
- def test_strip(self):
- b = self.type2test(b'mississippi')
- self.assertEqual(b.strip(b'i'), b'mississipp')
- self.assertEqual(b.strip(b'm'), b'ississippi')
- self.assertEqual(b.strip(b'pi'), b'mississ')
- self.assertEqual(b.strip(b'im'), b'ssissipp')
- self.assertEqual(b.strip(b'pim'), b'ssiss')
- self.assertEqual(b.strip(b), b'')
-
- def test_lstrip(self):
- b = self.type2test(b'mississippi')
- self.assertEqual(b.lstrip(b'i'), b'mississippi')
- self.assertEqual(b.lstrip(b'm'), b'ississippi')
- self.assertEqual(b.lstrip(b'pi'), b'mississippi')
- self.assertEqual(b.lstrip(b'im'), b'ssissippi')
- self.assertEqual(b.lstrip(b'pim'), b'ssissippi')
-
- def test_rstrip(self):
- b = self.type2test(b'mississippi')
- self.assertEqual(b.rstrip(b'i'), b'mississipp')
- self.assertEqual(b.rstrip(b'm'), b'mississippi')
- self.assertEqual(b.rstrip(b'pi'), b'mississ')
- self.assertEqual(b.rstrip(b'im'), b'mississipp')
- self.assertEqual(b.rstrip(b'pim'), b'mississ')
-
- def test_strip_whitespace(self):
- b = self.type2test(b' \t\n\r\f\vabc \t\n\r\f\v')
- self.assertEqual(b.strip(), b'abc')
- self.assertEqual(b.lstrip(), b'abc \t\n\r\f\v')
- self.assertEqual(b.rstrip(), b' \t\n\r\f\vabc')
-
- def test_strip_bytearray(self):
- self.assertEqual(self.type2test(b'abc').strip(memoryview(b'ac')), b'b')
- self.assertEqual(self.type2test(b'abc').lstrip(memoryview(b'ac')), b'bc')
- self.assertEqual(self.type2test(b'abc').rstrip(memoryview(b'ac')), b'ab')
-
- def test_strip_string_error(self):
- self.assertRaises(TypeError, self.type2test(b'abc').strip, 'b')
- self.assertRaises(TypeError, self.type2test(b'abc').lstrip, 'b')
- self.assertRaises(TypeError, self.type2test(b'abc').rstrip, 'b')
-
- def test_center(self):
- # Fill character can be either bytes or bytearray (issue 12380)
- b = self.type2test(b'abc')
- for fill_type in (bytes, bytearray):
- self.assertEqual(b.center(7, fill_type(b'-')),
- self.type2test(b'--abc--'))
-
- def test_ljust(self):
- # Fill character can be either bytes or bytearray (issue 12380)
- b = self.type2test(b'abc')
- for fill_type in (bytes, bytearray):
- self.assertEqual(b.ljust(7, fill_type(b'-')),
- self.type2test(b'abc----'))
-
- def test_rjust(self):
- # Fill character can be either bytes or bytearray (issue 12380)
- b = self.type2test(b'abc')
- for fill_type in (bytes, bytearray):
- self.assertEqual(b.rjust(7, fill_type(b'-')),
- self.type2test(b'----abc'))
-
- def test_ord(self):
- b = self.type2test(b'\0A\x7f\x80\xff')
- self.assertEqual([ord(b[i:i+1]) for i in range(len(b))],
- [0, 65, 127, 128, 255])
-
- def test_maketrans(self):
- transtable = b'\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
- self.assertEqual(self.type2test.maketrans(b'abc', b'xyz'), transtable)
- transtable = b'\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374xyz'
- self.assertEqual(self.type2test.maketrans(b'\375\376\377', b'xyz'), transtable)
- self.assertRaises(ValueError, self.type2test.maketrans, b'abc', b'xyzq')
- self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def')
-
- def test_none_arguments(self):
- # issue 11828
- b = self.type2test(b'hello')
- l = self.type2test(b'l')
- h = self.type2test(b'h')
- x = self.type2test(b'x')
- o = self.type2test(b'o')
-
- self.assertEqual(2, b.find(l, None))
- self.assertEqual(3, b.find(l, -2, None))
- self.assertEqual(2, b.find(l, None, -2))
- self.assertEqual(0, b.find(h, None, None))
-
- self.assertEqual(3, b.rfind(l, None))
- self.assertEqual(3, b.rfind(l, -2, None))
- self.assertEqual(2, b.rfind(l, None, -2))
- self.assertEqual(0, b.rfind(h, None, None))
-
- self.assertEqual(2, b.index(l, None))
- self.assertEqual(3, b.index(l, -2, None))
- self.assertEqual(2, b.index(l, None, -2))
- self.assertEqual(0, b.index(h, None, None))
-
- self.assertEqual(3, b.rindex(l, None))
- self.assertEqual(3, b.rindex(l, -2, None))
- self.assertEqual(2, b.rindex(l, None, -2))
- self.assertEqual(0, b.rindex(h, None, None))
-
- self.assertEqual(2, b.count(l, None))
- self.assertEqual(1, b.count(l, -2, None))
- self.assertEqual(1, b.count(l, None, -2))
- self.assertEqual(0, b.count(x, None, None))
-
- self.assertEqual(True, b.endswith(o, None))
- self.assertEqual(True, b.endswith(o, -2, None))
- self.assertEqual(True, b.endswith(l, None, -2))
- self.assertEqual(False, b.endswith(x, None, None))
-
- self.assertEqual(True, b.startswith(h, None))
- self.assertEqual(True, b.startswith(l, -2, None))
- self.assertEqual(True, b.startswith(h, None, -2))
- self.assertEqual(False, b.startswith(x, None, None))
-
- def test_integer_arguments_out_of_byte_range(self):
- b = self.type2test(b'hello')
-
- for method in (b.count, b.find, b.index, b.rfind, b.rindex):
- self.assertRaises(ValueError, method, -1)
- self.assertRaises(ValueError, method, 256)
- self.assertRaises(ValueError, method, 9999)
-
- def test_find_etc_raise_correct_error_messages(self):
- # issue 11828
- b = self.type2test(b'hello')
- x = self.type2test(b'x')
- self.assertRaisesRegex(TypeError, r'\bfind\b', b.find,
- x, None, None, None)
- self.assertRaisesRegex(TypeError, r'\brfind\b', b.rfind,
- x, None, None, None)
- self.assertRaisesRegex(TypeError, r'\bindex\b', b.index,
- x, None, None, None)
- self.assertRaisesRegex(TypeError, r'\brindex\b', b.rindex,
- x, None, None, None)
- self.assertRaisesRegex(TypeError, r'\bcount\b', b.count,
- x, None, None, None)
- self.assertRaisesRegex(TypeError, r'\bstartswith\b', b.startswith,
- x, None, None, None)
- self.assertRaisesRegex(TypeError, r'\bendswith\b', b.endswith,
- x, None, None, None)
-
-
-@unittest.expectedFailure
-class BytesTest(BaseBytesTest, unittest.TestCase):
- type2test = bytes
-
- def test_buffer_is_readonly(self):
- fd = os.dup(sys.stdin.fileno())
- with open(fd, "rb", buffering=0) as f:
- self.assertRaises(TypeError, f.readinto, b"")
-
- def test_custom(self):
- class A:
- def __bytes__(self):
- return b'abc'
- self.assertEqual(bytes(A()), b'abc')
- class A: pass
- self.assertRaises(TypeError, bytes, A())
- class A:
- def __bytes__(self):
- return None
- self.assertRaises(TypeError, bytes, A())
- class A:
- def __bytes__(self):
- return b'a'
- def __index__(self):
- return 42
- self.assertEqual(bytes(A()), b'a')
-
- # Test PyBytes_FromFormat()
- def test_from_format(self):
- test.support.import_module('ctypes')
- from ctypes import pythonapi, py_object, c_int, c_char_p
- PyBytes_FromFormat = pythonapi.PyBytes_FromFormat
- PyBytes_FromFormat.restype = py_object
-
- self.assertEqual(PyBytes_FromFormat(b'format'),
- b'format')
-
- self.assertEqual(PyBytes_FromFormat(b'%'), b'%')
- self.assertEqual(PyBytes_FromFormat(b'%%'), b'%')
- self.assertEqual(PyBytes_FromFormat(b'%%s'), b'%s')
- self.assertEqual(PyBytes_FromFormat(b'[%%]'), b'[%]')
- self.assertEqual(PyBytes_FromFormat(b'%%%c', c_int(ord('_'))), b'%_')
-
- self.assertEqual(PyBytes_FromFormat(b'c:%c', c_int(255)),
- b'c:\xff')
- self.assertEqual(PyBytes_FromFormat(b's:%s', c_char_p(b'cstr')),
- b's:cstr')
-
-
-@unittest.expectedFailure
-class ByteArrayTest(BaseBytesTest, unittest.TestCase):
- type2test = bytearray
-
- def test_nohash(self):
- self.assertRaises(TypeError, hash, bytearray())
-
- def test_bytearray_api(self):
- short_sample = b"Hello world\n"
- sample = short_sample + b"\0"*(20 - len(short_sample))
- tfn = tempfile.mktemp()
- try:
- # Prepare
- with open(tfn, "wb") as f:
- f.write(short_sample)
- # Test readinto
- with open(tfn, "rb") as f:
- b = bytearray(20)
- n = f.readinto(b)
- self.assertEqual(n, len(short_sample))
- self.assertEqual(list(b), list(sample))
- # Test writing in binary mode
- with open(tfn, "wb") as f:
- f.write(b)
- with open(tfn, "rb") as f:
- self.assertEqual(f.read(), sample)
- # Text mode is ambiguous; don't test
- finally:
- try:
- os.remove(tfn)
- except os.error:
- pass
-
- def test_reverse(self):
- b = bytearray(b'hello')
- self.assertEqual(b.reverse(), None)
- self.assertEqual(b, b'olleh')
- b = bytearray(b'hello1') # test even number of items
- b.reverse()
- self.assertEqual(b, b'1olleh')
- b = bytearray()
- b.reverse()
- self.assertFalse(b)
-
- def test_clear(self):
- b = bytearray(b'python')
- b.clear()
- self.assertEqual(b, b'')
-
- b = bytearray(b'')
- b.clear()
- self.assertEqual(b, b'')
-
- b = bytearray(b'')
- b.append(ord('r'))
- b.clear()
- b.append(ord('p'))
- self.assertEqual(b, b'p')
-
- def test_copy(self):
- b = bytearray(b'abc')
- bb = b.copy()
- self.assertEqual(bb, b'abc')
-
- b = bytearray(b'')
- bb = b.copy()
- self.assertEqual(bb, b'')
-
- # test that it's indeed a copy and not a reference
- b = bytearray(b'abc')
- bb = b.copy()
- self.assertEqual(b, bb)
- self.assertIsNot(b, bb)
- bb.append(ord('d'))
- self.assertEqual(bb, b'abcd')
- self.assertEqual(b, b'abc')
-
- def test_regexps(self):
- def by(s):
- return bytearray(map(ord, s))
- b = by("Hello, world")
- self.assertEqual(re.findall(br"\w+", b), [by("Hello"), by("world")])
-
- def test_setitem(self):
- b = bytearray([1, 2, 3])
- b[1] = 100
- self.assertEqual(b, bytearray([1, 100, 3]))
- b[-1] = 200
- self.assertEqual(b, bytearray([1, 100, 200]))
- b[0] = Indexable(10)
- self.assertEqual(b, bytearray([10, 100, 200]))
- try:
- b[3] = 0
- self.fail("Didn't raise IndexError")
- except IndexError:
- pass
- try:
- b[-10] = 0
- self.fail("Didn't raise IndexError")
- except IndexError:
- pass
- try:
- b[0] = 256
- self.fail("Didn't raise ValueError")
- except ValueError:
- pass
- try:
- b[0] = Indexable(-1)
- self.fail("Didn't raise ValueError")
- except ValueError:
- pass
- try:
- b[0] = None
- self.fail("Didn't raise TypeError")
- except TypeError:
- pass
-
- def test_delitem(self):
- b = bytearray(range(10))
- del b[0]
- self.assertEqual(b, bytearray(range(1, 10)))
- del b[-1]
- self.assertEqual(b, bytearray(range(1, 9)))
- del b[4]
- self.assertEqual(b, bytearray([1, 2, 3, 4, 6, 7, 8]))
-
- def test_setslice(self):
- b = bytearray(range(10))
- self.assertEqual(list(b), list(range(10)))
-
- b[0:5] = bytearray([1, 1, 1, 1, 1])
- self.assertEqual(b, bytearray([1, 1, 1, 1, 1, 5, 6, 7, 8, 9]))
-
- del b[0:-5]
- self.assertEqual(b, bytearray([5, 6, 7, 8, 9]))
-
- b[0:0] = bytearray([0, 1, 2, 3, 4])
- self.assertEqual(b, bytearray(range(10)))
-
- b[-7:-3] = bytearray([100, 101])
- self.assertEqual(b, bytearray([0, 1, 2, 100, 101, 7, 8, 9]))
-
- b[3:5] = [3, 4, 5, 6]
- self.assertEqual(b, bytearray(range(10)))
-
- b[3:0] = [42, 42, 42]
- self.assertEqual(b, bytearray([0, 1, 2, 42, 42, 42, 3, 4, 5, 6, 7, 8, 9]))
-
- b[3:] = b'foo'
- self.assertEqual(b, bytearray([0, 1, 2, 102, 111, 111]))
-
- b[:3] = memoryview(b'foo')
- self.assertEqual(b, bytearray([102, 111, 111, 102, 111, 111]))
-
- b[3:4] = []
- self.assertEqual(b, bytearray([102, 111, 111, 111, 111]))
-
- for elem in [5, -5, 0, int(10e20), 'str', 2.3,
- ['a', 'b'], [b'a', b'b'], [[]]]:
- with self.assertRaises(TypeError):
- b[3:4] = elem
-
- for elem in [[254, 255, 256], [-256, 9000]]:
- with self.assertRaises(ValueError):
- b[3:4] = elem
-
- def test_extended_set_del_slice(self):
- indices = (0, None, 1, 3, 19, 300, 1<<333, -1, -2, -31, -300)
- for start in indices:
- for stop in indices:
- # Skip invalid step 0
- for step in indices[1:]:
- L = list(range(255))
- b = bytearray(L)
- # Make sure we have a slice of exactly the right length,
- # but with different data.
- data = L[start:stop:step]
- data.reverse()
- L[start:stop:step] = data
- b[start:stop:step] = data
- self.assertEqual(b, bytearray(L))
-
- del L[start:stop:step]
- del b[start:stop:step]
- self.assertEqual(b, bytearray(L))
-
- def test_setslice_trap(self):
- # This test verifies that we correctly handle assigning self
- # to a slice of self (the old Lambert Meertens trap).
- b = bytearray(range(256))
- b[8:] = b
- self.assertEqual(b, bytearray(list(range(8)) + list(range(256))))
-
- def test_iconcat(self):
- b = bytearray(b"abc")
- b1 = b
- b += b"def"
- self.assertEqual(b, b"abcdef")
- self.assertEqual(b, b1)
- self.assertTrue(b is b1)
- b += b"xyz"
- self.assertEqual(b, b"abcdefxyz")
- try:
- b += ""
- except TypeError:
- pass
- else:
- self.fail("bytes += unicode didn't raise TypeError")
-
- def test_irepeat(self):
- b = bytearray(b"abc")
- b1 = b
- b *= 3
- self.assertEqual(b, b"abcabcabc")
- self.assertEqual(b, b1)
- self.assertTrue(b is b1)
-
- def test_irepeat_1char(self):
- b = bytearray(b"x")
- b1 = b
- b *= 100
- self.assertEqual(b, b"x"*100)
- self.assertEqual(b, b1)
- self.assertTrue(b is b1)
-
- def test_alloc(self):
- b = bytearray()
- alloc = b.__alloc__()
- self.assertTrue(alloc >= 0)
- seq = [alloc]
- for i in range(100):
- b += b"x"
- alloc = b.__alloc__()
- self.assertTrue(alloc >= len(b))
- if alloc not in seq:
- seq.append(alloc)
-
- def test_extend(self):
- orig = b'hello'
- a = bytearray(orig)
- a.extend(a)
- self.assertEqual(a, orig + orig)
- self.assertEqual(a[5:], orig)
- a = bytearray(b'')
- # Test iterators that don't have a __length_hint__
- a.extend(map(int, orig * 25))
- a.extend(int(x) for x in orig * 25)
- self.assertEqual(a, orig * 50)
- self.assertEqual(a[-5:], orig)
- a = bytearray(b'')
- a.extend(iter(map(int, orig * 50)))
- self.assertEqual(a, orig * 50)
- self.assertEqual(a[-5:], orig)
- a = bytearray(b'')
- a.extend(list(map(int, orig * 50)))
- self.assertEqual(a, orig * 50)
- self.assertEqual(a[-5:], orig)
- a = bytearray(b'')
- self.assertRaises(ValueError, a.extend, [0, 1, 2, 256])
- self.assertRaises(ValueError, a.extend, [0, 1, 2, -1])
- self.assertEqual(len(a), 0)
- a = bytearray(b'')
- a.extend([Indexable(ord('a'))])
- self.assertEqual(a, b'a')
-
- def test_remove(self):
- b = bytearray(b'hello')
- b.remove(ord('l'))
- self.assertEqual(b, b'helo')
- b.remove(ord('l'))
- self.assertEqual(b, b'heo')
- self.assertRaises(ValueError, lambda: b.remove(ord('l')))
- self.assertRaises(ValueError, lambda: b.remove(400))
- self.assertRaises(TypeError, lambda: b.remove('e'))
- # remove first and last
- b.remove(ord('o'))
- b.remove(ord('h'))
- self.assertEqual(b, b'e')
- self.assertRaises(TypeError, lambda: b.remove(b'e'))
- b.remove(Indexable(ord('e')))
- self.assertEqual(b, b'')
-
- def test_pop(self):
- b = bytearray(b'world')
- self.assertEqual(b.pop(), ord('d'))
- self.assertEqual(b.pop(0), ord('w'))
- self.assertEqual(b.pop(-2), ord('r'))
- self.assertRaises(IndexError, lambda: b.pop(10))
- self.assertRaises(IndexError, lambda: bytearray().pop())
- # test for issue #6846
- self.assertEqual(bytearray(b'\xff').pop(), 0xff)
-
- def test_nosort(self):
- self.assertRaises(AttributeError, lambda: bytearray().sort())
-
- def test_append(self):
- b = bytearray(b'hell')
- b.append(ord('o'))
- self.assertEqual(b, b'hello')
- self.assertEqual(b.append(100), None)
- b = bytearray()
- b.append(ord('A'))
- self.assertEqual(len(b), 1)
- self.assertRaises(TypeError, lambda: b.append(b'o'))
- b = bytearray()
- b.append(Indexable(ord('A')))
- self.assertEqual(b, b'A')
-
- def test_insert(self):
- b = bytearray(b'msssspp')
- b.insert(1, ord('i'))
- b.insert(4, ord('i'))
- b.insert(-2, ord('i'))
- b.insert(1000, ord('i'))
- self.assertEqual(b, b'mississippi')
- self.assertRaises(TypeError, lambda: b.insert(0, b'1'))
- b = bytearray()
- b.insert(0, Indexable(ord('A')))
- self.assertEqual(b, b'A')
-
- def test_copied(self):
- # Issue 4348. Make sure that operations that don't mutate the array
- # copy the bytes.
- b = bytearray(b'abc')
- self.assertFalse(b is b.replace(b'abc', b'cde', 0))
-
- t = bytearray([i for i in range(256)])
- x = bytearray(b'')
- self.assertFalse(x is x.translate(t))
-
- def test_partition_bytearray_doesnt_share_nullstring(self):
- a, b, c = bytearray(b"x").partition(b"y")
- self.assertEqual(b, b"")
- self.assertEqual(c, b"")
- self.assertTrue(b is not c)
- b += b"!"
- self.assertEqual(c, b"")
- a, b, c = bytearray(b"x").partition(b"y")
- self.assertEqual(b, b"")
- self.assertEqual(c, b"")
- # Same for rpartition
- b, c, a = bytearray(b"x").rpartition(b"y")
- self.assertEqual(b, b"")
- self.assertEqual(c, b"")
- self.assertTrue(b is not c)
- b += b"!"
- self.assertEqual(c, b"")
- c, b, a = bytearray(b"x").rpartition(b"y")
- self.assertEqual(b, b"")
- self.assertEqual(c, b"")
-
- def test_resize_forbidden(self):
- # #4509: can't resize a bytearray when there are buffer exports, even
- # if it wouldn't reallocate the underlying buffer.
- # Furthermore, no destructive changes to the buffer may be applied
- # before raising the error.
- b = bytearray(range(10))
- v = memoryview(b)
- def resize(n):
- b[1:-1] = range(n + 1, 2*n - 1)
- resize(10)
- orig = b[:]
- self.assertRaises(BufferError, resize, 11)
- self.assertEqual(b, orig)
- self.assertRaises(BufferError, resize, 9)
- self.assertEqual(b, orig)
- self.assertRaises(BufferError, resize, 0)
- self.assertEqual(b, orig)
- # Other operations implying resize
- self.assertRaises(BufferError, b.pop, 0)
- self.assertEqual(b, orig)
- self.assertRaises(BufferError, b.remove, b[1])
- self.assertEqual(b, orig)
- def delitem():
- del b[1]
- self.assertRaises(BufferError, delitem)
- self.assertEqual(b, orig)
- # deleting a non-contiguous slice
- def delslice():
- b[1:-1:2] = b""
- self.assertRaises(BufferError, delslice)
- self.assertEqual(b, orig)
-
-
-@unittest.expectedFailure
-class AssortedBytesTest(unittest.TestCase):
- #
- # Test various combinations of bytes and bytearray
- #
-
- @check_bytes_warnings
- def test_repr_str(self):
- for f in str, repr:
- self.assertEqual(f(bytearray()), "bytearray(b'')")
- self.assertEqual(f(bytearray([0])), "bytearray(b'\\x00')")
- self.assertEqual(f(bytearray([0, 1, 254, 255])),
- "bytearray(b'\\x00\\x01\\xfe\\xff')")
- self.assertEqual(f(b"abc"), "b'abc'")
- self.assertEqual(f(b"'"), '''b"'"''') # '''
- self.assertEqual(f(b"'\""), r"""b'\'"'""") # '
-
- def test_compare_bytes_to_bytearray(self):
- self.assertEqual(b"abc" == bytes(b"abc"), True)
- self.assertEqual(b"ab" != bytes(b"abc"), True)
- self.assertEqual(b"ab" <= bytes(b"abc"), True)
- self.assertEqual(b"ab" < bytes(b"abc"), True)
- self.assertEqual(b"abc" >= bytes(b"ab"), True)
- self.assertEqual(b"abc" > bytes(b"ab"), True)
-
- self.assertEqual(b"abc" != bytes(b"abc"), False)
- self.assertEqual(b"ab" == bytes(b"abc"), False)
- self.assertEqual(b"ab" > bytes(b"abc"), False)
- self.assertEqual(b"ab" >= bytes(b"abc"), False)
- self.assertEqual(b"abc" < bytes(b"ab"), False)
- self.assertEqual(b"abc" <= bytes(b"ab"), False)
-
- self.assertEqual(bytes(b"abc") == b"abc", True)
- self.assertEqual(bytes(b"ab") != b"abc", True)
- self.assertEqual(bytes(b"ab") <= b"abc", True)
- self.assertEqual(bytes(b"ab") < b"abc", True)
- self.assertEqual(bytes(b"abc") >= b"ab", True)
- self.assertEqual(bytes(b"abc") > b"ab", True)
-
- self.assertEqual(bytes(b"abc") != b"abc", False)
- self.assertEqual(bytes(b"ab") == b"abc", False)
- self.assertEqual(bytes(b"ab") > b"abc", False)
- self.assertEqual(bytes(b"ab") >= b"abc", False)
- self.assertEqual(bytes(b"abc") < b"ab", False)
- self.assertEqual(bytes(b"abc") <= b"ab", False)
-
- @test.support.requires_docstrings
- def test_doc(self):
- self.assertIsNotNone(bytearray.__doc__)
- self.assertTrue(bytearray.__doc__.startswith("bytearray("), bytearray.__doc__)
- self.assertIsNotNone(bytes.__doc__)
- self.assertTrue(bytes.__doc__.startswith("bytes("), bytes.__doc__)
-
- def test_from_bytearray(self):
- sample = bytes(b"Hello world\n\x80\x81\xfe\xff")
- buf = memoryview(sample)
- b = bytearray(buf)
- self.assertEqual(b, bytearray(sample))
-
- @check_bytes_warnings
- def test_to_str(self):
- self.assertEqual(str(b''), "b''")
- self.assertEqual(str(b'x'), "b'x'")
- self.assertEqual(str(b'\x80'), "b'\\x80'")
- self.assertEqual(str(bytearray(b'')), "bytearray(b'')")
- self.assertEqual(str(bytearray(b'x')), "bytearray(b'x')")
- self.assertEqual(str(bytearray(b'\x80')), "bytearray(b'\\x80')")
-
- def test_literal(self):
- tests = [
- (b"Wonderful spam", "Wonderful spam"),
- (br"Wonderful spam too", "Wonderful spam too"),
- (b"\xaa\x00\000\200", "\xaa\x00\000\200"),
- (br"\xaa\x00\000\200", r"\xaa\x00\000\200"),
- ]
- for b, s in tests:
- self.assertEqual(b, bytearray(s, 'latin-1'))
- for c in range(128, 256):
- self.assertRaises(SyntaxError, eval,
- 'b"%s"' % chr(c))
-
- def test_translate(self):
- b = b'hello'
- ba = bytearray(b)
- rosetta = bytearray(range(0, 256))
- rosetta[ord('o')] = ord('e')
- c = b.translate(rosetta, b'l')
- self.assertEqual(b, b'hello')
- self.assertEqual(c, b'hee')
- c = ba.translate(rosetta, b'l')
- self.assertEqual(ba, b'hello')
- self.assertEqual(c, b'hee')
- c = b.translate(None, b'e')
- self.assertEqual(c, b'hllo')
- c = ba.translate(None, b'e')
- self.assertEqual(c, b'hllo')
- self.assertRaises(TypeError, b.translate, None, None)
- self.assertRaises(TypeError, ba.translate, None, None)
-
- def test_split_bytearray(self):
- self.assertEqual(b'a b'.split(memoryview(b' ')), [b'a', b'b'])
-
- def test_rsplit_bytearray(self):
- self.assertEqual(b'a b'.rsplit(memoryview(b' ')), [b'a', b'b'])
-
- def test_return_self(self):
- # bytearray.replace must always return a new bytearray
- b = bytearray()
- self.assertFalse(b.replace(b'', b'') is b)
-
- def test_compare(self):
- if sys.flags.bytes_warning:
- def bytes_warning():
- return test.support.check_warnings(('', BytesWarning))
- with bytes_warning():
- b'' == ''
- with bytes_warning():
- b'' != ''
- with bytes_warning():
- bytearray(b'') == ''
- with bytes_warning():
- bytearray(b'') != ''
- else:
- self.skipTest("BytesWarning is needed for this test: use -bb option")
-
- # Optimizations:
- # __iter__? (optimization)
- # __reversed__? (optimization)
-
- # XXX More string methods? (Those that don't use character properties)
-
- # There are tests in string_tests.py that are more
- # comprehensive for things like split, partition, etc.
- # Unfortunately they are all bundled with tests that
- # are not appropriate for bytes
-
- # I've started porting some of those into bytearray_tests.py, we should port
- # the rest that make sense (the code can be cleaned up to use modern
- # unittest methods at the same time).
-
-@unittest.expectedFailure
-class BytearrayPEP3137Test(unittest.TestCase,
- test.buffer_tests.MixinBytesBufferCommonTests):
- def marshal(self, x):
- return bytearray(x)
-
- def test_returns_new_copy(self):
- val = self.marshal(b'1234')
- # On immutable types these MAY return a reference to themselves
- # but on mutable types like bytearray they MUST return a new copy.
- for methname in ('zfill', 'rjust', 'ljust', 'center'):
- method = getattr(val, methname)
- newval = method(3)
- self.assertEqual(val, newval)
- self.assertTrue(val is not newval,
- methname+' returned self on a mutable object')
- for expr in ('val.split()[0]', 'val.rsplit()[0]',
- 'val.partition(b".")[0]', 'val.rpartition(b".")[2]',
- 'val.splitlines()[0]', 'val.replace(b"", b"")'):
- newval = eval(expr)
- self.assertEqual(val, newval)
- self.assertTrue(val is not newval,
- expr+' returned val on a mutable object')
-
-class FixedStringTest(test.string_tests.BaseTest):
-
- def fixtype(self, obj):
- if isinstance(obj, str):
- return obj.encode("utf-8")
- return super().fixtype(obj)
-
- # Currently the bytes containment testing uses a single integer
- # value. This may not be the final design, but until then the
- # bytes section with in a bytes containment not valid
- def test_contains(self):
- pass
- def test_expandtabs(self):
- pass
- def test_upper(self):
- pass
- def test_lower(self):
- pass
-
-@unittest.expectedFailure
-class ByteArrayAsStringTest(FixedStringTest, unittest.TestCase):
- type2test = bytearray
- contains_bytes = True
-
-@unittest.expectedFailure
-class BytesAsStringTest(FixedStringTest, unittest.TestCase):
- type2test = bytes
- contains_bytes = True
-
-
-class SubclassTest:
-
- def test_basic(self):
- self.assertTrue(issubclass(self.subclass2test, self.type2test))
- self.assertIsInstance(self.subclass2test(), self.type2test)
-
- a, b = b"abcd", b"efgh"
- _a, _b = self.subclass2test(a), self.subclass2test(b)
-
- # test comparison operators with subclass instances
- self.assertTrue(_a == _a)
- self.assertTrue(_a != _b)
- self.assertTrue(_a < _b)
- self.assertTrue(_a <= _b)
- self.assertTrue(_b >= _a)
- self.assertTrue(_b > _a)
- self.assertTrue(_a is not a)
-
- # test concat of subclass instances
- self.assertEqual(a + b, _a + _b)
- self.assertEqual(a + b, a + _b)
- self.assertEqual(a + b, _a + b)
-
- # test repeat
- self.assertTrue(a*5 == _a*5)
-
- def test_join(self):
- # Make sure join returns a NEW object for single item sequences
- # involving a subclass.
- # Make sure that it is of the appropriate type.
- s1 = self.subclass2test(b"abcd")
- s2 = self.type2test().join([s1])
- self.assertTrue(s1 is not s2)
- self.assertTrue(type(s2) is self.type2test, type(s2))
-
- # Test reverse, calling join on subclass
- s3 = s1.join([b"abcd"])
- self.assertTrue(type(s3) is self.type2test)
-
- def test_pickle(self):
- a = self.subclass2test(b"abcd")
- a.x = 10
- a.y = self.subclass2test(b"efgh")
- for proto in range(pickle.HIGHEST_PROTOCOL + 1):
- b = pickle.loads(pickle.dumps(a, proto))
- self.assertNotEqual(id(a), id(b))
- self.assertEqual(a, b)
- self.assertEqual(a.x, b.x)
- self.assertEqual(a.y, b.y)
- self.assertEqual(type(a), type(b))
- self.assertEqual(type(a.y), type(b.y))
-
- def test_copy(self):
- a = self.subclass2test(b"abcd")
- a.x = 10
- a.y = self.subclass2test(b"efgh")
- for copy_method in (copy.copy, copy.deepcopy):
- b = copy_method(a)
- self.assertNotEqual(id(a), id(b))
- self.assertEqual(a, b)
- self.assertEqual(a.x, b.x)
- self.assertEqual(a.y, b.y)
- self.assertEqual(type(a), type(b))
- self.assertEqual(type(a.y), type(b.y))
-
-
-class ByteArraySubclass(bytearray):
- pass
-
-class BytesSubclass(bytes):
- pass
-
-@unittest.expectedFailure
-class ByteArraySubclassTest(SubclassTest, unittest.TestCase):
- type2test = bytearray
- subclass2test = ByteArraySubclass
-
- def test_init_override(self):
- class subclass(bytearray):
- def __init__(me, newarg=1, *args, **kwargs):
- bytearray.__init__(me, *args, **kwargs)
- x = subclass(4, b"abcd")
- x = subclass(4, source=b"abcd")
- self.assertEqual(x, b"abcd")
- x = subclass(newarg=4, source=b"abcd")
- self.assertEqual(x, b"abcd")
-
-
-@unittest.expectedFailure
-class BytesSubclassTest(SubclassTest, unittest.TestCase):
- type2test = bytes
- subclass2test = BytesSubclass
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/future/tests/test_builtins.py b/future/tests/test_builtins.py
deleted file mode 100644
index c24b295f..00000000
--- a/future/tests/test_builtins.py
+++ /dev/null
@@ -1,159 +0,0 @@
-"""
-Tests to make sure the behaviour of the builtins is sensible and correct.
-"""
-
-from __future__ import absolute_import, division, unicode_literals
-from future.builtins import *
-from future.utils import PY3
-from future.tests.base import unittest
-
-import textwrap
-from subprocess import Popen, PIPE
-from numbers import Integral
-from decimal import Decimal
-
-
-class TestBuiltins(unittest.TestCase):
- def test_super(self):
- class verbose_list(list):
- '''
- A class that uses the new simpler super() function
- '''
- def append(self, item):
- print('Adding an item')
- super().append(item)
-
- l = verbose_list()
- l.append('blah')
- self.assertEqual(l[0], 'blah')
- self.assertEqual(len(l), 1)
- self.assertTrue(isinstance(l, list))
-
- def test_isinstance_int(self):
- """
- Redefining ``int`` to a ``long`` subclass on Py2 makes this
- test fail unless isinstance() is defined appropriately:
- """
- self.assertTrue(isinstance(0, int))
- self.assertTrue(isinstance(int(1), int))
- self.assertFalse(isinstance(1.0, int))
-
- def test_isinstance_Integral(self):
- """
- Tests the preferred alternative to the above
- """
- self.assertTrue(isinstance(0, Integral))
-
- def test_isinstance_long(self):
- """
- Py2's long doesn't inherit from int!
- """
- self.assertTrue(isinstance(10**100, int))
- self.assertTrue(isinstance(int(2**64), int))
- if not PY3:
- self.assertTrue(isinstance(long(1), int))
- # Note: the following is a SyntaxError on Py3:
- # self.assertTrue(isinstance(1L, int))
-
- def test_isinstance_bytes(self):
- self.assertTrue(isinstance(b'byte-string', bytes))
- self.assertFalse(isinstance(b'byte-string', str))
-
- def test_isinstance_str(self):
- self.assertTrue(isinstance('string', str))
- self.assertTrue(isinstance(u'string', str))
- self.assertFalse(isinstance(u'string', bytes))
-
- @unittest.expectedFailure
- def test_type(self):
- """
- The following fails when passed a unicode string on Python
- (including when unicode_literals is in effect) and fails when
- passed a byte-string on Python 3. So type() always wants a native
- string as the first argument.
-
- TODO: maybe provide a replacement that works identically on Py2/3?
- """
- mytype = type('blah', (dict,), {"old": 1, "new": 2})
- d = mytype()
- self.assertTrue(isinstance(d, mytype))
- self.assertTrue(isinstance(d, dict))
-
- def test_isinstance_tuple_of_types(self):
- # These two should be equivalent, even if ``int`` is a special
- # backported type.
- label = 1
- self.assertTrue(isinstance(label, (float, Decimal)) or
- isinstance(label, int))
- self.assertTrue(isinstance(label, (float, Decimal, int)))
- self.assertTrue(isinstance(10**100, (float, Decimal, int)))
-
- self.assertTrue(isinstance(b'blah', (str, bytes)))
- self.assertTrue(isinstance(b'blah', (bytes, float, int)))
-
- self.assertFalse(isinstance(b'blah', (str, Decimal, float, int)))
-
- self.assertTrue(isinstance('blah', (str, Decimal, float, int)))
- self.assertTrue(isinstance(u'blah', (Decimal, float, int, str)))
-
- self.assertFalse(isinstance('blah', (bytes, Decimal, float, int)))
-
- @unittest.skipIf(sys.version_info[:2] == (2, 6),
- 'not yet implemented for Py2.6')
- def test_round(self):
- """
- Note that the Python 2.x round() function fails these tests. The
- Python 3.x round() function passes them, as should our custom
- round() function.
- """
- self.assertEqual(round(0.1250, 2), 0.12)
- self.assertEqual(round(0.1350, 2), 0.14)
- self.assertEqual(round(0.1251, 2), 0.13)
- self.assertEqual(round(0.125000001, 2), 0.13)
- self.assertEqual(round(123.5, 0), 124.0)
- self.assertEqual(round(123.5), 124)
- self.assertEqual(round(12.35, 2), 12.35)
- self.assertEqual(round(12.35, 1), 12.3)
- self.assertEqual(round(12.35, 0), 12.0)
- self.assertEqual(round(123.5, 1), 123.5)
-
- self.assertTrue(isinstance(round(123.5, 0), float))
- self.assertTrue(isinstance(round(123.5), Integral))
-
- @unittest.skip('negative ndigits not implemented yet')
- def test_round_negative_ndigits(self):
- self.assertEqual(round(10.1350, 0), 10.0)
- self.assertEqual(round(10.1350, -1), 10.0)
- self.assertEqual(round(10.1350, -2), 0.0)
- self.assertEqual(round(10.1350, -3), 0.0)
-
- self.assertEqual(round(12.35, -1), 10.0)
- self.assertEqual(round(12.35, -2), 0.0)
- self.assertEqual(round(123.5, -1), 120.0)
- self.assertEqual(round(123.5, -2), 100.0)
- self.assertEqual(round(123.551, -2), 100.0)
- self.assertEqual(round(123.551, -3), 0.0)
-
- def test_input(self, interpreter='python2'):
- """
- Passes in a string to the waiting input()
- """
- code = '''
- from future.builtins import input
- def greet(name):
- print "Hello, {0}!".format(name)
- print "What's your name?"
- name = input()
- greet(name)
- '''
- with open('mytestscript.py', 'w') as f:
- f.write(textwrap.dedent(code))
- p1 = Popen([interpreter, 'mytestscript.py'], stdout=PIPE, stdin=PIPE, stderr=None)
- (stdout, stderr) = p1.communicate(b'Ed')
- # print(stdout)
- # print(stderr)
- self.assertEqual(stdout, b"What's your name?\nHello, Ed!\n")
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/future/tests/test_bytes.py b/future/tests/test_bytes.py
deleted file mode 100644
index 882fcfa8..00000000
--- a/future/tests/test_bytes.py
+++ /dev/null
@@ -1,434 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Tests for the backported bytes object
-"""
-
-from __future__ import absolute_import, unicode_literals, print_function
-from future.builtins import *
-from future import utils
-
-from numbers import Integral
-from future.tests.base import unittest
-
-
-TEST_UNICODE_STR = u'ℝεα∂@ßʟ℮ ☂ℯṧт υηḯ¢☺ḓ℮'
-# Tk icon as a .gif:
-TEST_BYTE_STR = b'GIF89a\x0e\x00\x0b\x00\x80\xff\x00\xff\x00\x00\xc0\xc0\xc0!\xf9\x04\x01\x00\x00\x01\x00,\x00\x00\x00\x00\x0e\x00\x0b\x00@\x02\x1f\x0c\x8e\x10\xbb\xcan\x90\x99\xaf&\xd8\x1a\xce\x9ar\x06F\xd7\xf1\x90\xa1c\x9e\xe8\x84\x99\x89\x97\xa2J\x01\x00;\x1a\x14\x00;;\xba\nD\x14\x00\x00;;'
-
-
-class TestBytes(unittest.TestCase):
- def test_bytes_encoding_arg(self):
- """
- The bytes class has changed in Python 3 to accept an
- additional argument in the constructor: encoding.
-
- It would be nice to support this without breaking the
- isinstance(..., bytes) test below.
- """
- u = u'Unicode string: \u5b54\u5b50'
- b = bytes(u, encoding='utf-8')
- self.assertEqual(b, u.encode('utf-8'))
-
- def test_bytes_string_no_encoding(self):
- with self.assertRaises(TypeError):
- bytes(u'ABC')
-
- def test_bytes_int(self):
- """
- In Py3, bytes(int) -> bytes object of size given by the parameter initialized with null
- """
- self.assertEqual(bytes(5), b'\x00\x00\x00\x00\x00')
- # Test using newint:
- self.assertEqual(bytes(int(5)), b'\x00\x00\x00\x00\x00')
- self.assertTrue(isinstance(bytes(int(5)), bytes))
-
- # Negative counts are not allowed in Py3:
- with self.assertRaises(ValueError):
- bytes(-1)
- with self.assertRaises(ValueError):
- bytes(int(-1))
-
- @unittest.skipIf(utils.PY3, 'test not needed on Py3: all ints are long')
- def test_bytes_long(self):
- """
- As above, but explicitly feeding in a long on Py2. Note that
- checks like:
- isinstance(n, int)
- are fragile on Py2, because isinstance(10L, int) is False.
- """
- m = long(5)
- n = long(-1)
- self.assertEqual(bytes(m), b'\x00\x00\x00\x00\x00')
- # Negative counts are not allowed in Py3:
- with self.assertRaises(ValueError):
- bytes(n)
-
- def test_bytes_empty(self):
- """
- bytes() -> b''
- """
- self.assertEqual(bytes(), b'')
-
- def test_bytes_iterable_of_ints(self):
- self.assertEqual(bytes([65, 66, 67]), b'ABC')
- self.assertEqual(bytes([int(120), int(121), int(122)]), b'xyz')
-
- def test_bytes_bytes(self):
- self.assertEqual(bytes(b'ABC'), b'ABC')
-
- def test_bytes_is_bytes(self):
- b = bytes(b'ABC')
- self.assertTrue(bytes(b) is b)
- self.assertEqual(repr(bytes(b)), "b'ABC'")
-
- def test_bytes_fromhex(self):
- self.assertEqual(bytes.fromhex('bb 0f'), b'\xbb\x0f')
- self.assertEqual(bytes.fromhex('1234'), b'\x124')
- self.assertEqual(bytes.fromhex('12ffa0'), b'\x12\xff\xa0')
- b = b'My bytestring'
- self.assertEqual(bytes(b).fromhex('bb 0f'), b'\xbb\x0f')
-
- def test_isinstance_bytes(self):
- self.assertTrue(isinstance(bytes(b'blah'), bytes))
-
- @unittest.expectedFailure
- def test_isinstance_oldbytestrings_bytes(self):
- """
- Watch out for this. Byte-strings produced in various places in Py2
- are of type 'str'. With 'from future.builtins import bytes', 'bytes'
- is redefined to be a subclass of 'str', not just an alias for 'str'.
- """
- self.assertTrue(isinstance(b'blah', bytes)) # not with the redefined bytes obj
- self.assertTrue(isinstance(u'blah'.encode('utf-8'), bytes)) # not with the redefined bytes obj
-
- def test_bytes_getitem(self):
- b = bytes(b'ABCD')
- self.assertEqual(b[0], 65)
- self.assertEqual(b[-1], 68)
- self.assertEqual(b[0:1], b'A')
- self.assertEqual(b[:], b'ABCD')
-
- @unittest.expectedFailure
- def test_b_literal_creates_newbytes_object(self):
- """
- It would nice if the b'' literal syntax could be coaxed into producing
- bytes objects somehow ... ;)
- """
- b = b'ABCD'
- self.assertTrue(isinstance(b, bytes))
- self.assertEqual(b[0], 65)
- self.assertTrue(repr(b).startswith('b'))
-
- def test_repr(self):
- b = bytes(b'ABCD')
- self.assertTrue(repr(b).startswith('b'))
-
- def test_str(self):
- b = bytes(b'ABCD')
- self.assertTrue(str(b), "b'ABCD'")
-
- def test_bytes_setitem(self):
- b = b'ABCD'
- with self.assertRaises(TypeError):
- b[0] = b'B'
-
- def test_bytes_iteration(self):
- b = bytes(b'ABCD')
- for item in b:
- self.assertTrue(isinstance(item, Integral))
- self.assertEqual(list(b), [65, 66, 67, 68])
-
- def test_bytes_plus_unicode_string(self):
- b = bytes(b'ABCD')
- u = u'EFGH'
- with self.assertRaises(TypeError):
- b + u
-
- with self.assertRaises(TypeError):
- u + b
-
- def test_bytes_plus_bytes(self):
- b1 = bytes(b'ABCD')
- b2 = b1 + b1
- self.assertEqual(b2, b'ABCDABCD')
- self.assertTrue(isinstance(b2, bytes))
-
- b3 = b1 + b'ZYXW'
- self.assertEqual(b3, b'ABCDZYXW')
- self.assertTrue(isinstance(b3, bytes))
-
- b4 = b'ZYXW' + b1
- self.assertEqual(b4, b'ZYXWABCD')
- self.assertTrue(isinstance(b4, bytes))
-
- def test_bytes_join_bytes(self):
- b = bytes(b' * ')
- strings = [b'AB', b'EFGH', b'IJKL']
- result = b.join(strings)
- self.assertEqual(result, b'AB * EFGH * IJKL')
- self.assertTrue(isinstance(result, bytes))
-
- def test_bytes_join_others(self):
- b = bytes(b' ')
- with self.assertRaises(TypeError):
- b.join([42])
- with self.assertRaises(TypeError):
- b.join(b'blah')
- with self.assertRaises(TypeError):
- b.join(bytes(b'blah'))
-
- def test_bytes_join_unicode_strings(self):
- b = bytes(b'ABCD')
- strings = [u'EFGH', u'IJKL']
- with self.assertRaises(TypeError):
- b.join(strings)
-
- def test_bytes_replace(self):
- b = bytes(b'ABCD')
- c = b.replace(b'A', b'F')
- self.assertEqual(c, b'FBCD')
- self.assertTrue(isinstance(c, bytes))
-
- with self.assertRaises(TypeError):
- b.replace(b'A', u'F')
- with self.assertRaises(TypeError):
- b.replace(u'A', b'F')
-
- def test_bytes_partition(self):
- b1 = bytes(b'ABCD')
- parts = b1.partition(b'B')
- self.assertEqual(parts, (b'A', b'B', b'CD'))
- self.assertTrue(all([isinstance(p, bytes) for p in parts]))
-
- b2 = bytes(b'ABCDABCD')
- parts = b2.partition(b'B')
- self.assertEqual(parts, (b'A', b'B', b'CDABCD'))
-
- def test_bytes_rpartition(self):
- b2 = bytes(b'ABCDABCD')
- parts = b2.rpartition(b'B')
- self.assertEqual(parts, (b'ABCDA', b'B', b'CD'))
- self.assertTrue(all([isinstance(p, bytes) for p in parts]))
-
- def test_bytes_contains_something(self):
- b = bytes(b'ABCD')
- self.assertTrue(b'A' in b)
- self.assertTrue(65 in b)
-
- self.assertTrue(b'AB' in b)
- self.assertTrue(bytes([65, 66]) in b)
-
- self.assertFalse(b'AC' in b)
- self.assertFalse(bytes([65, 67]) in b)
-
- self.assertFalse(b'Z' in b)
- self.assertFalse(99 in b)
-
- with self.assertRaises(TypeError):
- u'A' in b
-
- def test_bytes_index(self):
- b = bytes(b'ABCD')
- self.assertEqual(b.index(b'B'), 1)
- self.assertEqual(b.index(67), 2)
-
- def test_startswith(self):
- b = bytes(b'abcd')
- self.assertTrue(b.startswith(b'a'))
- self.assertTrue(b.startswith((b'a', b'b')))
- self.assertTrue(b.startswith(bytes(b'ab')))
- self.assertFalse(b.startswith((b'A', b'B')))
-
- with self.assertRaises(TypeError) as cm:
- b.startswith(65)
- with self.assertRaises(TypeError) as cm:
- b.startswith([b'A'])
- exc = str(cm.exception)
- # self.assertIn('bytes', exc)
- # self.assertIn('tuple', exc)
-
- def test_endswith(self):
- b = bytes(b'abcd')
- self.assertTrue(b.endswith(b'd'))
- self.assertTrue(b.endswith((b'c', b'd')))
- self.assertTrue(b.endswith(bytes(b'cd')))
- self.assertFalse(b.endswith((b'A', b'B')))
-
- with self.assertRaises(TypeError) as cm:
- b.endswith(65)
- with self.assertRaises(TypeError) as cm:
- b.endswith([b'D'])
- exc = str(cm.exception)
- # self.assertIn('bytes', exc)
- # self.assertIn('tuple', exc)
-
- def test_decode(self):
- b = bytes(b'abcd')
- s = b.decode('utf-8')
- self.assertEqual(s, 'abcd')
- self.assertTrue(isinstance(s, str))
-
- def test_encode(self):
- b = bytes(b'abcd')
- with self.assertRaises(AttributeError) as cm:
- b.encode('utf-8')
-
- def test_eq(self):
- """
- Equals: ==
- """
- b = bytes(b'ABCD')
- self.assertEqual(b, b'ABCD')
- self.assertTrue(b == b'ABCD')
- self.assertEqual(b'ABCD', b)
- self.assertEqual(b, b)
- self.assertFalse(b == b'ABC')
- self.assertFalse(b == bytes(b'ABC'))
- self.assertFalse(b == u'ABCD')
- self.assertFalse(b == str('ABCD'))
- # Fails:
- # self.assertFalse(u'ABCD' == b)
- self.assertFalse(str('ABCD') == b)
-
- self.assertFalse(b == list(b))
- self.assertFalse(b == str(b))
- self.assertFalse(b == u'ABC')
- self.assertFalse(bytes(b'Z') == 90)
-
- def test_ne(self):
- b = bytes(b'ABCD')
- self.assertFalse(b != b)
- self.assertFalse(b != b'ABCD')
- self.assertTrue(b != b'ABCDEFG')
- self.assertTrue(b != bytes(b'ABCDEFG'))
- self.assertTrue(b'ABCDEFG' != b)
-
- # self.assertTrue(b'ABCD' != u'ABCD')
- self.assertTrue(b != u'ABCD')
- self.assertTrue(b != u'ABCDE')
- self.assertTrue(bytes(b'') != str(u''))
- self.assertTrue(str(u'') != bytes(b''))
-
- self.assertTrue(b != list(b))
- self.assertTrue(b != str(b))
-
- def test_hash(self):
- d = {}
- b = bytes(b'ABCD')
- native_b = b'ABCD'
- s = str('ABCD')
- native_s = u'ABCD'
- d[b] = b
- d[s] = s
- self.assertEqual(len(d), 2)
- # This should overwrite d[s] but not d[b]:
- d[native_s] = native_s
- self.assertEqual(len(d), 2)
- # This should overwrite d[native_s] again:
- d[s] = s
- self.assertEqual(len(d), 2)
- self.assertEqual(set(d.keys()), set([s, b]))
-
- @unittest.expectedFailure
- def test_hash_with_native_types(self):
- # Warning: initializing the dict with native Py2 types throws the
- # hashing out:
- d = {u'ABCD': u'ABCD', b'ABCD': b'ABCD'}
- # On Py2: len(d) == 1
- b = bytes(b'ABCD')
- s = str('ABCD')
- d[s] = s
- d[b] = b
- # Fails:
- self.assertEqual(len(d) > 1)
-
- def test_add(self):
- b = bytes(b'ABC')
- c = bytes(b'XYZ')
- d = b + c
- self.assertTrue(isinstance(d, bytes))
- self.assertEqual(d, b'ABCXYZ')
- f = b + b'abc'
- self.assertTrue(isinstance(f, bytes))
- self.assertEqual(f, b'ABCabc')
- g = b'abc' + b
- self.assertTrue(isinstance(g, bytes))
- self.assertEqual(g, b'abcABC')
-
- def test_cmp(self):
- b = bytes(b'ABC')
- with self.assertRaises(TypeError):
- b > 3
- with self.assertRaises(TypeError):
- b > u'XYZ'
- with self.assertRaises(TypeError):
- b <= 3
- with self.assertRaises(TypeError):
- b >= int(3)
- with self.assertRaises(TypeError):
- b < 3.3
- with self.assertRaises(TypeError):
- b > (3.3 + 3j)
- with self.assertRaises(TypeError):
- b >= (1, 2)
- with self.assertRaises(TypeError):
- b <= [1, 2]
-
- def test_mul(self):
- b = bytes(b'ABC')
- c = b * 4
- self.assertTrue(isinstance(c, bytes))
- self.assertEqual(c, b'ABCABCABCABC')
- d = b * int(4)
- self.assertTrue(isinstance(d, bytes))
- self.assertEqual(d, b'ABCABCABCABC')
- if utils.PY2:
- e = b * long(4)
- self.assertTrue(isinstance(e, bytes))
- self.assertEqual(e, b'ABCABCABCABC')
-
- def test_rmul(self):
- b = bytes(b'XYZ')
- c = 3 * b
- self.assertTrue(isinstance(c, bytes))
- self.assertEqual(c, b'XYZXYZXYZ')
- d = b * int(3)
- self.assertTrue(isinstance(d, bytes))
- self.assertEqual(d, b'XYZXYZXYZ')
- if utils.PY2:
- e = long(3) * b
- self.assertTrue(isinstance(e, bytes))
- self.assertEqual(e, b'XYZXYZXYZ')
-
- def test_slice(self):
- b = bytes(b'ABCD')
- c1 = b[:]
- self.assertTrue(isinstance(c1, bytes))
- self.assertTrue(c1 == b)
- # The following is not true, whereas it is true normally on Py2 and
- # Py3. Does this matter?:
- # self.assertTrue(c1 is b)
-
- c2 = b[10:]
- self.assertTrue(isinstance(c2, bytes))
- self.assertTrue(c2 == bytes(b''))
- self.assertTrue(c2 == b'')
-
- c3 = b[:0]
- self.assertTrue(isinstance(c3, bytes))
- self.assertTrue(c3 == bytes(b''))
- self.assertTrue(c3 == b'')
-
- c4 = b[:1]
- self.assertTrue(isinstance(c4, bytes))
- self.assertTrue(c4 == bytes(b'A'))
- self.assertTrue(c4 == b'A')
-
- c5 = b[:-1]
- self.assertTrue(isinstance(c5, bytes))
- self.assertTrue(c5 == bytes(b'ABC'))
- self.assertTrue(c5 == b'ABC')
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/future/tests/test_futurize.py b/future/tests/test_futurize.py
deleted file mode 100644
index 19f86d7f..00000000
--- a/future/tests/test_futurize.py
+++ /dev/null
@@ -1,636 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, division, print_function
-
-import pprint
-from subprocess import Popen, PIPE
-import os
-
-from future.tests.base import CodeHandler, unittest
-
-
-class TestFuturizeSimple(CodeHandler):
- """
- This class contains snippets of Python 2 code (invalid Python 3) and
- tests for whether they can be passed to ``futurize`` and immediately
- run under both Python 2 again and Python 3.
- """
-
- @unittest.expectedFailure
- def test_problematic_string(self):
- """ This string generates a SyntaxError on Python 3 unless it has
- an r prefix.
- """
- before = r"""
- s = 'The folder is "C:\Users"'.
- """
- after = r"""
- s = r'The folder is "C:\Users"'.
- """
- self.convert_check(before, after)
-
- def test_tobytes(self):
- """
- The --tobytes option converts all UNADORNED string literals 'abcd' to b'abcd'.
- It does apply to multi-line strings but doesn't apply if it's a raw
- string, because ur'abcd' is a SyntaxError on Python 2 and br'abcd' is a
- SyntaxError on Python 3.
- """
- before = r"""
- s0 = '1234'
- s1 = '''5678
- '''
- s2 = "9abc"
- # Unchanged:
- s3 = r'1234'
- s4 = R"defg"
- s5 = u'hijk'
- s6 = u"lmno"
- s7 = b'lmno'
- s8 = b"pqrs"
- """
- after = r"""
- s0 = b'1234'
- s1 = b'''5678
- '''
- s2 = b"9abc"
- # Unchanged:
- s3 = r'1234'
- s4 = R"defg"
- s5 = u'hijk'
- s6 = u"lmno"
- s7 = b'lmno'
- s8 = b"pqrs"
- """
- self.convert_check(before, after, tobytes=True)
-
- @unittest.expectedFailure
- def test_izip(self):
- before = """
- from itertools import izip
- for (a, b) in izip([1, 3, 5], [2, 4, 6]):
- pass
- """
- after = """
- from __future__ import unicode_literals
- from future.builtins import zip
- for (a, b) in zip([1, 3, 5], [2, 4, 6]):
- pass
- """
- self.convert_check(before, after, stages=(1, 2), ignore_imports=False)
-
- @unittest.expectedFailure
- def test_no_unneeded_list_calls(self):
- """
- TODO: get this working
- """
- code = """
- for (a, b) in zip(range(3), range(3, 6)):
- pass
- """
- self.unchanged(code)
-
- def test_import_builtins(self):
- before = """
- a = raw_input()
- b = open(a, b, c)
- c = filter(a, b)
- d = map(a, b)
- e = isinstance(a, str)
- f = bytes(a, encoding='utf-8')
- for g in xrange(10**10):
- pass
- super(MyClass, self)
- """
- after = """
- from __future__ import unicode_literals
- from future.builtins import bytes
- from future.builtins import filter
- from future.builtins import input
- from future.builtins import map
- from future.builtins import open
- from future.builtins import range
- from future.builtins import super
- a = input()
- b = open(a, b, c)
- c = list(filter(a, b))
- d = list(map(a, b))
- e = isinstance(a, str)
- f = bytes(a, encoding='utf-8')
- for g in range(10**10):
- pass
- super(MyClass, self)
- """
- self.convert_check(before, after, ignore_imports=False, run=False)
-
- def test_xrange(self):
- code = '''
- for i in xrange(10):
- pass
- '''
- self.convert(code)
-
- @unittest.expectedFailure
- def test_source_coding_utf8(self):
- """
- Tests to ensure that the source coding line is not corrupted or
- removed. It must be left as the first line in the file (including
- before any __future__ imports). Also tests whether the unicode
- characters in this encoding are parsed correctly and left alone.
- """
- code = """
- # -*- coding: utf-8 -*-
- icons = [u"◐", u"◓", u"◑", u"◒"]
- """
- self.unchanged(code)
-
- def test_exception_syntax(self):
- """
- Test of whether futurize handles the old-style exception syntax
- """
- before = """
- try:
- pass
- except IOError, e:
- val = e.errno
- """
- after = """
- try:
- pass
- except IOError as e:
- val = e.errno
- """
- self.convert_check(before, after)
-
- def test_super(self):
- """
- This tests whether futurize keeps the old two-argument super() calls the
- same as before. It should, because this still works in Py3.
- """
- code = '''
- class VerboseList(list):
- def append(self, item):
- print('Adding an item')
- super(VerboseList, self).append(item)
- '''
- self.unchanged(code)
-
- @unittest.expectedFailure
- def test_file(self):
- """
- file() as a synonym for open() is obsolete and invalid on Python 3.
- """
- before = '''
- f = file(__file__)
- data = f.read()
- f.close()
- '''
- after = '''
- f = open(__file__)
- data = f.read()
- f.close()
- '''
- self.convert_check(before, after)
-
- def test_apply(self):
- before = '''
- def addup(*x):
- return sum(x)
-
- assert apply(addup, (10,20)) == 30
- '''
- after = """
- def addup(*x):
- return sum(x)
-
- assert addup(*(10,20)) == 30
- """
- self.convert_check(before, after)
-
- @unittest.skip('not implemented yet')
- def test_download_pypi_package_and_test(self, package_name='future'):
- URL = 'http://pypi.python.org/pypi/{0}/json'
-
- import requests
- r = requests.get(URL.format(package_name))
- pprint.pprint(r.json())
-
- download_url = r.json()['urls'][0]['url']
- filename = r.json()['urls'][0]['filename']
- # r2 = requests.get(download_url)
- # with open('/tmp/' + filename, 'w') as tarball:
- # tarball.write(r2.content)
-
- def test_raw_input(self):
- """
- Passes in a string to the waiting input() after futurize
- conversion.
-
- The code is the first snippet from these docs:
- http://docs.python.org/2/library/2to3.html
- """
- before = """
- def greet(name):
- print "Hello, {0}!".format(name)
- print "What's your name?"
- name = raw_input()
- greet(name)
- """
- desired = """
- def greet(name):
- print("Hello, {0}!".format(name))
- print("What's your name?")
- name = input()
- greet(name)
- """
- self.convert_check(before, desired, run=False)
-
- for interpreter in self.interpreters:
- p1 = Popen([interpreter, self.tempdir + 'mytestscript.py'],
- stdout=PIPE, stdin=PIPE, stderr=PIPE, env=self.env)
- (stdout, stderr) = p1.communicate(b'Ed')
- self.assertEqual(stdout, b"What's your name?\nHello, Ed!\n")
-
- def test_literal_prefixes_are_not_stripped(self):
- """
- Tests to ensure that the u'' and b'' prefixes on unicode strings and
- byte strings are not removed by the futurize script. Removing the
- prefixes on Py3.3+ is unnecessary and loses some information -- namely,
- that the strings have explicitly been marked as unicode or bytes,
- rather than just e.g. a guess by some automated tool about what they
- are.
- """
- code = '''
- s = u'unicode string'
- b = b'byte string'
- '''
- self.unchanged(code)
-
- @unittest.expectedFailure
- def test_division(self):
- """
- TODO: implement this!
- """
- before = """
- x = 1 / 2
- """
- after = """
- from future.utils import old_div
- x = old_div(1, 2)
- """
- self.convert_check(before, after, stages=[1])
-
-
-class TestFuturizeRenamedStdlib(CodeHandler):
- def test_renamed_modules(self):
- before = """
- import ConfigParser
- import copy_reg
- import cPickle
- import cStringIO
-
- s = cStringIO.StringIO('blah')
- """
- after = """
- import configparser
- import copyreg
- import pickle
- import io
-
- s = io.StringIO('blah')
- """
- self.convert_check(before, after)
-
- @unittest.expectedFailure
- def test_urllib_refactor(self):
- # Code like this using urllib is refactored by futurize --stage2 to use
- # the new Py3 module names, but ``future`` doesn't support urllib yet.
- before = """
- import urllib
-
- URL = 'http://pypi.python.org/pypi/future/json'
- package_name = 'future'
- r = urllib.urlopen(URL.format(package_name))
- data = r.read()
- """
- after = """
- import urllib.request
-
- URL = 'http://pypi.python.org/pypi/future/json'
- package_name = 'future'
- r = urllib.request.urlopen(URL.format(package_name))
- data = r.read()
- """
- self.convert_check(before, after)
-
- def test_renamed_copy_reg_and_cPickle_modules(self):
- """
- Example from docs.python.org/2/library/copy_reg.html
- """
- before = """
- import copy_reg
- import copy
- import cPickle
- class C(object):
- def __init__(self, a):
- self.a = a
-
- def pickle_c(c):
- print('pickling a C instance...')
- return C, (c.a,)
-
- copy_reg.pickle(C, pickle_c)
- c = C(1)
- d = copy.copy(c)
- p = cPickle.dumps(c)
- """
- after = """
- import copyreg
- import copy
- import pickle
- class C(object):
- def __init__(self, a):
- self.a = a
-
- def pickle_c(c):
- print('pickling a C instance...')
- return C, (c.a,)
-
- copyreg.pickle(C, pickle_c)
- c = C(1)
- d = copy.copy(c)
- p = pickle.dumps(c)
- """
- self.convert_check(before, after)
-
- @unittest.expectedFailure
- def test_Py2_StringIO_module(self):
- """
- Ideally, there would be a fixer for this. For now:
-
- TODO: add the Py3 equivalent for this to the docs
- """
- before = """
- import cStringIO
- s = cStringIO.StringIO('my string')
- assert isinstance(s, cStringIO.InputType)
- """
- after = """
- import io
- s = io.StringIO('my string')
- # assert isinstance(s, io.InputType)
- # There is no io.InputType in Python 3. What should we change this to
- # instead?
- """
- self.convert_check(before, after)
-
-
-class TestFuturizeStage1(CodeHandler):
- """
- Tests "stage 1": safe optimizations: modernizing Python 2 code so that it
- uses print functions, new-style exception syntax, etc.
-
- The behaviour should not change and this should introduce no dependency on
- the ``future`` package. It produces more modern Python 2-only code. The
- goal is to reduce the size of the real porting patch-set by performing
- the uncontroversial patches first.
- """
-
- def test_apply(self):
- """
- apply() should be changed by futurize --stage1
- """
- before = '''
- def f(a, b):
- return a + b
-
- args = (1, 2)
- assert apply(f, args) == 3
- assert apply(f, ('a', 'b')) == 'ab'
- '''
- after = '''
- def f(a, b):
- return a + b
-
- args = (1, 2)
- assert f(*args) == 3
- assert f(*('a', 'b')) == 'ab'
- '''
- self.convert_check(before, after, stages=[1])
-
- def test_xrange(self):
- """
- xrange should not be changed by futurize --stage1
- """
- code = '''
- for i in xrange(10):
- pass
- '''
- self.unchanged(code, stages=[1])
-
- @unittest.expectedFailure
- def test_absolute_import_changes(self):
- """
- Implicit relative imports should be converted to absolute or explicit
- relative imports correctly.
-
- Issue #16 (with porting bokeh/bbmodel.py)
- """
- with open('specialmodels.py', 'w') as f:
- f.write('pass')
-
- before = """
- import specialmodels.pandasmodel
- specialmodels.pandasmodel.blah()
- """
- after = """
- from __future__ import absolute_import
- from .specialmodels import pandasmodel
- pandasmodel.blah()
- """
- self.convert_check(before, after, stages=[1])
-
- def test_safe_futurize_imports(self):
- """
- The standard library module names should not be changed until stage 2
- """
- before = """
- import ConfigParser
- import HTMLParser
- import collections
-
- ConfigParser.ConfigParser
- HTMLParser.HTMLParser
- d = collections.OrderedDict()
- """
- self.unchanged(before, stages=[1])
-
- def test_print(self):
- before = """
- print 'Hello'
- """
- after = """
- print('Hello')
- """
- self.convert_check(before, after, stages=[1])
-
- before = """
- import sys
- print >> sys.stderr, 'Hello', 'world'
- """
- after = """
- import sys
- print('Hello', 'world', file=sys.stderr)
- """
- self.convert_check(before, after, stages=[1])
-
- def test_print_already_function(self):
- """
- Running futurize --stage1 should not add a second set of parentheses
- """
- before = """
- print('Hello')
- """
- self.unchanged(before, stages=[1])
-
- @unittest.expectedFailure
- def test_print_already_function_complex(self):
- """
- Running futurize --stage1 does add a second second set of parentheses
- in this case. This is because the underlying lib2to3 has two distinct
- grammars -- with a print statement and with a print function -- and,
- when going forwards (2 to both), futurize assumes print is a statement,
- which raises a ParseError.
- """
- before = """
- import sys
- print('Hello', 'world', file=sys.stderr)
- """
- self.unchanged(before, stages=[1])
-
- def test_exceptions(self):
- before = """
- try:
- raise AttributeError('blah')
- except AttributeError, e:
- pass
- """
- after = """
- try:
- raise AttributeError('blah')
- except AttributeError as e:
- pass
- """
- self.convert_check(before, after, stages=[1])
-
- @unittest.expectedFailure
- def test_string_exceptions(self):
- """
- 2to3 does not convert string exceptions: see
- http://python3porting.com/differences.html.
- """
- before = """
- try:
- raise "old string exception"
- except Exception, e:
- pass
- """
- after = """
- try:
- raise Exception("old string exception")
- except Exception as e:
- pass
- """
- self.convert_check(before, after, stages=[1])
-
- @unittest.expectedFailure
- def test_oldstyle_classes(self):
- """
- We don't convert old-style classes to new-style automatically. Should we?
- """
- before = """
- class Blah:
- pass
- """
- after = """
- class Blah(object):
- pass
- """
- self.convert_check(before, after, stages=[1])
-
- @unittest.expectedFailure
- def test_all(self):
- """
- Standard library module names should not be changed in stage 1
- """
- before = """
- import ConfigParser
- import HTMLParser
- import collections
-
- print 'Hello'
- try:
- raise AttributeError('blah')
- except AttributeError, e:
- pass
- print 'Number is', 1 / 2
- """
- after = """
- from future.utils import old_div
- import Configparser
- import HTMLParser
- import collections
-
- print('Hello')
- try:
- raise AttributeError('blah')
- except AttributeError as e:
- pass
- print('Number is', old_div(1, 2))
- """
- self.convert_check(before, after, stages=[1])
-
- def test_octal_literals(self):
- before = """
- mode = 0644
- """
- after = """
- mode = 0o644
- """
- self.convert_check(before, after)
-
- def test_long_int_literals(self):
- before = """
- bignumber = 12345678901234567890L
- """
- after = """
- bignumber = 12345678901234567890
- """
- self.convert_check(before, after)
-
- def test___future___import_position(self):
- """
- Issue #4: __future__ imports inserted too low in file: SyntaxError
- """
- code = """
- # Comments here
- # and here
- __version__=''' $Id$ '''
- __doc__="A Sequencer class counts things. It aids numbering and formatting lists."
- __all__='Sequencer getSequencer setSequencer'.split()
- #
- # another comment
- #
-
- CONSTANTS = [ 0, 01, 011, 0111, 012, 02, 021, 0211, 02111, 013 ]
- _RN_LETTERS = "IVXLCDM"
-
- def my_func(value):
- pass
-
- ''' Docstring-like comment here '''
- """
- self.convert(code)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/future/tests/test_futurize_from3.py b/future/tests/test_futurize_from3.py
deleted file mode 100644
index d7e9c29b..00000000
--- a/future/tests/test_futurize_from3.py
+++ /dev/null
@@ -1,155 +0,0 @@
-"""
-This module contains snippets of Python 3 code (invalid Python 2) and
-tests for whether they can be passed to ``futurize --from3`` and immediately
-run under both Python 2 and Python 3.
-"""
-
-from __future__ import print_function, absolute_import
-
-import pprint
-from subprocess import Popen, PIPE
-import tempfile
-import os
-
-from future.tests.base import CodeHandler, unittest
-
-
-class TestFuturizeFrom3(CodeHandler):
- def test_range_slice(self):
- """
- After running ``futurize --from3``, this Python 3 code should run on
- both Py3 and Py2 without a MemoryError
- """
- code = '''
- for i in range(10**15)[:10]:
- pass
- '''
- self.unchanged(code, from3=True)
-
- def test_print(self):
- """
- This Python 3-only code is a SyntaxError on Py2 without the
- print_function import from __future__.
- """
- code = '''
- import sys
- print('Hello', file=sys.stderr)
- '''
- self.unchanged(code, from3=True)
-
- def test_division(self):
- """
- True division should not be screwed up by conversion from 3 to both
- """
- code = '''
- x = 3 / 2
- assert x == 1.5
- '''
- self.unchanged(code, from3=True)
-
-
-class TestFuturizeAnnotations(CodeHandler):
- @unittest.expectedFailure
- def test_return_annotations_alone(self):
- before = "def foo() -> 'bar': pass"
- after = """
- def foo(): pass
- foo.__annotations__ = {'return': 'bar'}
- """
- self.check(before, after, from3=True)
-
- b = """
- def foo() -> "bar":
- print "baz"
- print "what's next, again?"
- """
- a = """
- def foo():
- print "baz"
- print "what's next, again?"
- """
- self.check(b, a, from3=True)
-
- @unittest.expectedFailure
- def test_single_param_annotations(self):
- b = "def foo(bar:'baz'): pass"
- a = """
- def foo(bar): pass
- foo.__annotations__ = {'bar': 'baz'}
- """
- self.check(b, a, from3=True)
-
- b = """
- def foo(bar:"baz"="spam"):
- print("what's next, again?")
- print("whatever.")
- """
- a = """
- def foo(bar="spam"):
- print("what's next, again?")
- print("whatever.")
- foo.__annotations__ = {'bar': 'baz'}
- """
- self.check(b, a, from3=True)
-
- @unittest.expectedFailure
- def test_multiple_param_annotations(self):
- b = "def foo(bar:'spam'=False, baz:'eggs'=True, ham:False='spaghetti'): pass"
- a = "def foo(bar=False, baz=True, ham='spaghetti'): pass"
- self.check(b, a, from3=True)
-
- b = """
- def foo(bar:"spam"=False, baz:"eggs"=True, ham:False="spam"):
- print("this is filler, just doing a suite")
- print("suites require multiple lines.")
- """
- a = """
- def foo(bar=False, baz=True, ham="spam"):
- print("this is filler, just doing a suite")
- print("suites require multiple lines.")
- """
- self.check(b, a, from3=True)
-
- @unittest.expectedFailure
- def test_mixed_annotations(self):
- b = "def foo(bar=False, baz:'eggs'=True, ham:False='spaghetti') -> 'zombies': pass"
- a = "def foo(bar=False, baz=True, ham='spaghetti'): pass"
- self.check(b, a, from3=True)
-
- b = """
- def foo(bar:"spam"=False, baz=True, ham:False="spam") -> 'air':
- print("this is filler, just doing a suite")
- print("suites require multiple lines.")
- """
- a = """
- def foo(bar=False, baz=True, ham="spam"):
- print("this is filler, just doing a suite")
- print("suites require multiple lines.")
- """
- self.check(b, a, from3=True)
-
- b = "def foo(bar) -> 'brains': pass"
- a = "def foo(bar): pass"
- self.check(b, a, from3=True)
-
- def test_functions_unchanged(self):
- s = "def foo(): pass"
- self.unchanged(s, from3=True)
-
- s = """
- def foo():
- pass
- pass
- """
- self.unchanged(s, from3=True)
-
- s = """
- def foo(bar='baz'):
- pass
- pass
- """
- self.unchanged(s, from3=True)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/future/tests/test_httpservers.py b/future/tests/test_httpservers.py
deleted file mode 100644
index 83e2d965..00000000
--- a/future/tests/test_httpservers.py
+++ /dev/null
@@ -1,715 +0,0 @@
-# coding: utf-8
-
-"""Unittests for the various HTTPServer modules.
-
-From Python 3.3
-
-Written by Cody A.W. Somerville ,
-Josip Dzolonga, and Michael Otteneder for the 2007/08 GHOP contest.
-"""
-
-from __future__ import (absolute_import, division,
- print_function, unicode_literals)
-from future import standard_library
-from future.builtins import *
-
-from http.server import BaseHTTPRequestHandler, HTTPServer, \
- SimpleHTTPRequestHandler, CGIHTTPRequestHandler
-from http import server
-
-import os
-import sys
-import re
-import base64
-import shutil
-# Not ported yet:
-# import urllib.parse
-# Use this instead:
-import urllib
-import http.client
-import tempfile
-from io import BytesIO
-
-from test import support
-from future.tests.base import unittest
-
-threading = support.import_module('threading')
-
-
-class NoLogRequestHandler(object):
- def log_message(self, *args):
- # don't write log messages to stderr
- pass
-
- def read(self, n=None):
- return ''
-
-
-class TestServerThread(threading.Thread):
- def __init__(self, test_object, request_handler):
- threading.Thread.__init__(self)
- self.request_handler = request_handler
- self.test_object = test_object
-
- def run(self):
- self.server = HTTPServer(('localhost', 0), self.request_handler)
- self.test_object.HOST, self.test_object.PORT = self.server.socket.getsockname()
- self.test_object.server_started.set()
- self.test_object = None
- try:
- self.server.serve_forever(0.05)
- finally:
- self.server.server_close()
-
- def stop(self):
- self.server.shutdown()
-
-
-class BaseTestCase(unittest.TestCase):
- def setUp(self):
- self._threads = support.threading_setup()
- os.environ = support.EnvironmentVarGuard()
- self.server_started = threading.Event()
- self.thread = TestServerThread(self, self.request_handler)
- self.thread.start()
- self.server_started.wait()
-
- def tearDown(self):
- self.thread.stop()
- self.thread = None
- os.environ.__exit__()
- support.threading_cleanup(*self._threads)
-
- def request(self, uri, method='GET', body=None, headers={}):
- self.connection = http.client.HTTPConnection(self.HOST, self.PORT)
- self.connection.request(method, uri, body, headers)
- return self.connection.getresponse()
-
-
-class BaseHTTPServerTestCase(BaseTestCase):
- class request_handler(NoLogRequestHandler, BaseHTTPRequestHandler):
- protocol_version = 'HTTP/1.1'
- default_request_version = 'HTTP/1.1'
-
- def do_TEST(self):
- self.send_response(204)
- self.send_header('Content-Type', 'text/html')
- self.send_header('Connection', 'close')
- self.end_headers()
-
- def do_KEEP(self):
- self.send_response(204)
- self.send_header('Content-Type', 'text/html')
- self.send_header('Connection', 'keep-alive')
- self.end_headers()
-
- def do_KEYERROR(self):
- self.send_error(999)
-
- def do_CUSTOM(self):
- self.send_response(999)
- self.send_header('Content-Type', 'text/html')
- self.send_header('Connection', 'close')
- self.end_headers()
-
- def do_LATINONEHEADER(self):
- self.send_response(999)
- self.send_header('X-Special', 'Dängerous Mind')
- self.send_header('Connection', 'close')
- self.end_headers()
- body = self.headers['x-special-incoming'].encode('utf-8')
- self.wfile.write(body)
-
- def setUp(self):
- BaseTestCase.setUp(self)
- self.con = http.client.HTTPConnection(self.HOST, self.PORT)
- self.con.connect()
-
- def test_command(self):
- self.con.request('GET', '/')
- res = self.con.getresponse()
- self.assertEqual(res.status, 501)
-
- def test_request_line_trimming(self):
- self.con._http_vsn_str = 'HTTP/1.1\n'
- self.con.putrequest('GET', '/')
- self.con.endheaders()
- res = self.con.getresponse()
- self.assertEqual(res.status, 501)
-
- def test_version_bogus(self):
- self.con._http_vsn_str = 'FUBAR'
- self.con.putrequest('GET', '/')
- self.con.endheaders()
- res = self.con.getresponse()
- self.assertEqual(res.status, 400)
-
- def test_version_digits(self):
- self.con._http_vsn_str = 'HTTP/9.9.9'
- self.con.putrequest('GET', '/')
- self.con.endheaders()
- res = self.con.getresponse()
- self.assertEqual(res.status, 400)
-
- def test_version_none_get(self):
- self.con._http_vsn_str = ''
- self.con.putrequest('GET', '/')
- self.con.endheaders()
- res = self.con.getresponse()
- self.assertEqual(res.status, 501)
-
- def test_version_none(self):
- self.con._http_vsn_str = ''
- self.con.putrequest('PUT', '/')
- self.con.endheaders()
- res = self.con.getresponse()
- self.assertEqual(res.status, 400)
-
- def test_version_invalid(self):
- self.con._http_vsn = 99
- self.con._http_vsn_str = 'HTTP/9.9'
- self.con.putrequest('GET', '/')
- self.con.endheaders()
- res = self.con.getresponse()
- self.assertEqual(res.status, 505)
-
- def test_send_blank(self):
- self.con._http_vsn_str = ''
- self.con.putrequest('', '')
- self.con.endheaders()
- res = self.con.getresponse()
- self.assertEqual(res.status, 400)
-
- def test_header_close(self):
- self.con.putrequest('GET', '/')
- self.con.putheader('Connection', 'close')
- self.con.endheaders()
- res = self.con.getresponse()
- self.assertEqual(res.status, 501)
-
- def test_head_keep_alive(self):
- self.con._http_vsn_str = 'HTTP/1.1'
- self.con.putrequest('GET', '/')
- self.con.putheader('Connection', 'keep-alive')
- self.con.endheaders()
- res = self.con.getresponse()
- self.assertEqual(res.status, 501)
-
- def test_handler(self):
- self.con.request('TEST', '/')
- res = self.con.getresponse()
- self.assertEqual(res.status, 204)
-
- def test_return_header_keep_alive(self):
- self.con.request('KEEP', '/')
- res = self.con.getresponse()
- self.assertEqual(res.getheader('Connection'), 'keep-alive')
- self.con.request('TEST', '/')
- self.addCleanup(self.con.close)
-
- def test_internal_key_error(self):
- self.con.request('KEYERROR', '/')
- res = self.con.getresponse()
- self.assertEqual(res.status, 999)
-
- def test_return_custom_status(self):
- self.con.request('CUSTOM', '/')
- res = self.con.getresponse()
- self.assertEqual(res.status, 999)
-
- @unittest.skip('Unicode bug in Py2.7 email.parser.parsestr ?')
- def test_latin1_header(self):
- self.con.request('LATINONEHEADER', '/', headers={
- 'X-Special-Incoming': 'Ärger mit Unicode'
- })
- res = self.con.getresponse()
- self.assertEqual(res.getheader('X-Special'), 'Dängerous Mind')
- self.assertEqual(res.read(), 'Ärger mit Unicode'.encode('utf-8'))
-
-
-class SimpleHTTPServerTestCase(BaseTestCase):
- class request_handler(NoLogRequestHandler, SimpleHTTPRequestHandler):
- pass
-
- def setUp(self):
- BaseTestCase.setUp(self)
- self.cwd = os.getcwd()
- basetempdir = tempfile.gettempdir()
- os.chdir(basetempdir)
- self.data = bytes(b'We are the knights who say Ni!')
- self.tempdir = tempfile.mkdtemp(dir=basetempdir)
- self.tempdir_name = os.path.basename(self.tempdir)
- with open(os.path.join(self.tempdir, 'test'), 'wb') as temp:
- temp.write(self.data)
-
- def tearDown(self):
- try:
- os.chdir(self.cwd)
- try:
- shutil.rmtree(self.tempdir)
- except:
- pass
- finally:
- BaseTestCase.tearDown(self)
-
- def check_status_and_reason(self, response, status, data=None):
- body = response.read()
- self.assertTrue(response)
- self.assertEqual(response.status, status)
- self.assertIsNotNone(response.reason)
- if data:
- self.assertEqual(data, body)
-
- def test_get(self):
- #constructs the path relative to the root directory of the HTTPServer
- response = self.request(self.tempdir_name + '/test')
- self.check_status_and_reason(response, 200, data=self.data)
- response = self.request(self.tempdir_name + '/')
- self.check_status_and_reason(response, 200)
- response = self.request(self.tempdir_name)
- self.check_status_and_reason(response, 301)
- response = self.request('/ThisDoesNotExist')
- self.check_status_and_reason(response, 404)
- response = self.request('/' + 'ThisDoesNotExist' + '/')
- self.check_status_and_reason(response, 404)
- with open(os.path.join(self.tempdir_name, 'index.html'), 'w') as f:
- response = self.request('/' + self.tempdir_name + '/')
- self.check_status_and_reason(response, 200)
- # chmod() doesn't work as expected on Windows, and filesystem
- # permissions are ignored by root on Unix.
- if os.name == 'posix' and os.geteuid() != 0:
- os.chmod(self.tempdir, 0)
- response = self.request(self.tempdir_name + '/')
- self.check_status_and_reason(response, 404)
- os.chmod(self.tempdir, 0o755)
-
- def test_head(self):
- response = self.request(
- self.tempdir_name + '/test', method='HEAD')
- self.check_status_and_reason(response, 200)
- self.assertEqual(response.getheader('content-length'),
- str(len(self.data)))
- self.assertEqual(response.getheader('content-type'),
- 'application/octet-stream')
-
- def test_invalid_requests(self):
- response = self.request('/', method='FOO')
- self.check_status_and_reason(response, 501)
- # requests must be case sensitive,so this should fail too
- response = self.request('/', method='get')
- self.check_status_and_reason(response, 501)
- response = self.request('/', method='GETs')
- self.check_status_and_reason(response, 501)
-
-
-cgi_file1 = """\
-#!%s
-
-print("Content-type: text/html")
-print()
-print("Hello World")
-"""
-
-cgi_file2 = """\
-#!%s
-import cgi
-
-print("Content-type: text/html")
-print()
-
-form = cgi.FieldStorage()
-print("%%s, %%s, %%s" %% (form.getfirst("spam"), form.getfirst("eggs"),
- form.getfirst("bacon")))
-"""
-
-
-@unittest.skipIf(hasattr(os, 'geteuid') and os.geteuid() == 0,
- "This test can't be run reliably as root (issue #13308).")
-class CGIHTTPServerTestCase(BaseTestCase):
- class request_handler(NoLogRequestHandler, CGIHTTPRequestHandler):
- pass
-
- linesep = os.linesep.encode('ascii')
-
- def setUp(self):
- BaseTestCase.setUp(self)
- self.cwd = os.getcwd()
- self.parent_dir = tempfile.mkdtemp()
- self.cgi_dir = os.path.join(self.parent_dir, 'cgi-bin')
- os.mkdir(self.cgi_dir)
- self.file1_path = None
- self.file2_path = None
-
- # The shebang line should be pure ASCII: use symlink if possible.
- # See issue #7668.
- if support.can_symlink():
- self.pythonexe = os.path.join(self.parent_dir, 'python')
- os.symlink(sys.executable, self.pythonexe)
- else:
- self.pythonexe = sys.executable
-
- try:
- # The python executable path is written as the first line of the
- # CGI Python script. The encoding cookie cannot be used, and so the
- # path should be encodable to the default script encoding (utf-8)
- self.pythonexe.encode('utf-8')
- except UnicodeEncodeError:
- self.tearDown()
- self.skipTest("Python executable path is not encodable to utf-8")
-
- self.file1_path = os.path.join(self.cgi_dir, 'file1.py')
- with open(self.file1_path, 'w', encoding='utf-8') as file1:
- file1.write(cgi_file1 % self.pythonexe)
- os.chmod(self.file1_path, 0o777)
-
- self.file2_path = os.path.join(self.cgi_dir, 'file2.py')
- with open(self.file2_path, 'w', encoding='utf-8') as file2:
- file2.write(cgi_file2 % self.pythonexe)
- os.chmod(self.file2_path, 0o777)
-
- os.chdir(self.parent_dir)
-
- def tearDown(self):
- try:
- os.chdir(self.cwd)
- if self.pythonexe != sys.executable:
- os.remove(self.pythonexe)
- if self.file1_path:
- os.remove(self.file1_path)
- if self.file2_path:
- os.remove(self.file2_path)
- os.rmdir(self.cgi_dir)
- os.rmdir(self.parent_dir)
- finally:
- BaseTestCase.tearDown(self)
-
- def test_url_collapse_path(self):
- # verify tail is the last portion and head is the rest on proper urls
- test_vectors = {
- '': '//',
- '..': IndexError,
- '/.//..': IndexError,
- '/': '//',
- '//': '//',
- '/\\': '//\\',
- '/.//': '//',
- 'cgi-bin/file1.py': '/cgi-bin/file1.py',
- '/cgi-bin/file1.py': '/cgi-bin/file1.py',
- 'a': '//a',
- '/a': '//a',
- '//a': '//a',
- './a': '//a',
- './C:/': '/C:/',
- '/a/b': '/a/b',
- '/a/b/': '/a/b/',
- '/a/b/.': '/a/b/',
- '/a/b/c/..': '/a/b/',
- '/a/b/c/../d': '/a/b/d',
- '/a/b/c/../d/e/../f': '/a/b/d/f',
- '/a/b/c/../d/e/../../f': '/a/b/f',
- '/a/b/c/../d/e/.././././..//f': '/a/b/f',
- '../a/b/c/../d/e/.././././..//f': IndexError,
- '/a/b/c/../d/e/../../../f': '/a/f',
- '/a/b/c/../d/e/../../../../f': '//f',
- '/a/b/c/../d/e/../../../../../f': IndexError,
- '/a/b/c/../d/e/../../../../f/..': '//',
- '/a/b/c/../d/e/../../../../f/../.': '//',
- }
- for path, expected in test_vectors.items():
- if isinstance(expected, type) and issubclass(expected, Exception):
- self.assertRaises(expected,
- server._url_collapse_path, path)
- else:
- actual = server._url_collapse_path(path)
- self.assertEqual(expected, actual,
- msg='path = %r\nGot: %r\nWanted: %r' %
- (path, actual, expected))
-
- @unittest.expectedFailure
- def test_headers_and_content(self):
- res = self.request('/cgi-bin/file1.py')
- self.assertEqual((b'Hello World' + self.linesep, 'text/html', 200),
- (res.read(), res.getheader('Content-type'), res.status))
-
- @unittest.expectedFailure
- def test_post(self):
- # Was: params = urllib.parse.urlencode(
- params = urllib.urlencode(
- {'spam' : 1, 'eggs' : 'python', 'bacon' : 123456})
- headers = {'Content-type' : 'application/x-www-form-urlencoded'}
- res = self.request('/cgi-bin/file2.py', 'POST', params, headers)
-
- self.assertEqual(res.read(), b'1, python, 123456' + self.linesep)
-
- def test_invaliduri(self):
- res = self.request('/cgi-bin/invalid')
- res.read()
- self.assertEqual(res.status, 404)
-
- @unittest.expectedFailure
- def test_authorization(self):
- headers = {bytes(b'Authorization') : bytes(b'Basic ') +
- base64.b64encode(bytes(b'username:pass'))}
- res = self.request('/cgi-bin/file1.py', 'GET', headers=headers)
- self.assertEqual((b'Hello World' + self.linesep, 'text/html', 200),
- (res.read(), res.getheader('Content-type'), res.status))
-
- @unittest.expectedFailure
- def test_no_leading_slash(self):
- # http://bugs.python.org/issue2254
- res = self.request('cgi-bin/file1.py')
- self.assertEqual((b'Hello World' + self.linesep, 'text/html', 200),
- (res.read(), res.getheader('Content-type'), res.status))
-
- @unittest.expectedFailure
- def test_os_environ_is_not_altered(self):
- signature = "Test CGI Server"
- os.environ['SERVER_SOFTWARE'] = signature
- res = self.request('/cgi-bin/file1.py')
- self.assertEqual((b'Hello World' + self.linesep, 'text/html', 200),
- (res.read(), res.getheader('Content-type'), res.status))
- self.assertEqual(os.environ['SERVER_SOFTWARE'], signature)
-
-
-class SocketlessRequestHandler(SimpleHTTPRequestHandler, object):
- def __init__(self):
- self.get_called = False
- self.protocol_version = "HTTP/1.1"
-
- def do_GET(self):
- self.get_called = True
- self.send_response(200)
- self.send_header('Content-Type', 'text/html')
- self.end_headers()
- self.wfile.write(bytes(b'Data\r\n'))
-
- def log_message(self, format, *args):
- pass
-
-
-class RejectingSocketlessRequestHandler(SocketlessRequestHandler):
- def handle_expect_100(self):
- self.send_error(417)
- return False
-
-
-class AuditableBytesIO(object):
-
- def __init__(self):
- self.datas = []
-
- def write(self, data):
- self.datas.append(data)
-
- def getData(self):
- return bytes(b'').join(self.datas)
-
- @property
- def numWrites(self):
- return len(self.datas)
-
-
-class BaseHTTPRequestHandlerTestCase(unittest.TestCase):
- """Test the functionality of the BaseHTTPServer.
-
- Test the support for the Expect 100-continue header.
- """
-
- HTTPResponseMatch = re.compile(b'HTTP/1.[0-9]+ 200 OK')
-
- def setUp (self):
- self.handler = SocketlessRequestHandler()
-
- def send_typical_request(self, message):
- input = BytesIO(message)
- output = BytesIO()
- self.handler.rfile = input
- self.handler.wfile = output
- self.handler.handle_one_request()
- output.seek(0)
- return output.readlines()
-
- def verify_get_called(self):
- self.assertTrue(self.handler.get_called)
-
- def verify_expected_headers(self, headers):
- for fieldName in b'Server: ', b'Date: ', b'Content-Type: ':
- self.assertEqual(sum(h.startswith(fieldName) for h in headers), 1)
-
- def verify_http_server_response(self, response):
- match = self.HTTPResponseMatch.search(response)
- self.assertTrue(match is not None)
-
- def test_http_1_1(self):
- result = self.send_typical_request(bytes(b'GET / HTTP/1.1\r\n\r\n'))
- self.verify_http_server_response(result[0])
- self.verify_expected_headers(result[1:-1])
- self.verify_get_called()
- self.assertEqual(result[-1], b'Data\r\n')
-
- def test_http_1_0(self):
- result = self.send_typical_request(bytes(b'GET / HTTP/1.0\r\n\r\n'))
- self.verify_http_server_response(result[0])
- self.verify_expected_headers(result[1:-1])
- self.verify_get_called()
- self.assertEqual(result[-1], b'Data\r\n')
-
- def test_http_0_9(self):
- result = self.send_typical_request(bytes(b'GET / HTTP/0.9\r\n\r\n'))
- self.assertEqual(len(result), 1)
- self.assertEqual(result[0], b'Data\r\n')
- self.verify_get_called()
-
- def test_with_continue_1_0(self):
- result = self.send_typical_request(bytes(b'GET / HTTP/1.0\r\nExpect: 100-continue\r\n\r\n'))
- self.verify_http_server_response(result[0])
- self.verify_expected_headers(result[1:-1])
- self.verify_get_called()
- self.assertEqual(result[-1], b'Data\r\n')
-
- def test_with_continue_1_1(self):
- result = self.send_typical_request(bytes(b'GET / HTTP/1.1\r\nExpect: 100-continue\r\n\r\n'))
- self.assertEqual(result[0], b'HTTP/1.1 100 Continue\r\n')
- self.assertEqual(result[1], b'HTTP/1.1 200 OK\r\n')
- self.verify_expected_headers(result[2:-1])
- self.verify_get_called()
- self.assertEqual(result[-1], b'Data\r\n')
-
- def test_header_buffering_of_send_error(self):
-
- input = BytesIO(bytes(b'GET / HTTP/1.1\r\n\r\n'))
- output = AuditableBytesIO()
- handler = SocketlessRequestHandler()
- handler.rfile = input
- handler.wfile = output
- handler.request_version = 'HTTP/1.1'
- handler.requestline = ''
- handler.command = None
-
- handler.send_error(418)
- self.assertEqual(output.numWrites, 2)
-
- def test_header_buffering_of_send_response_only(self):
-
- input = BytesIO(bytes(b'GET / HTTP/1.1\r\n\r\n'))
- output = AuditableBytesIO()
- handler = SocketlessRequestHandler()
- handler.rfile = input
- handler.wfile = output
- handler.request_version = 'HTTP/1.1'
-
- handler.send_response_only(418)
- self.assertEqual(output.numWrites, 0)
- handler.end_headers()
- self.assertEqual(output.numWrites, 1)
-
- def test_header_buffering_of_send_header(self):
-
- input = BytesIO(bytes(b'GET / HTTP/1.1\r\n\r\n'))
- output = AuditableBytesIO()
- handler = SocketlessRequestHandler()
- handler.rfile = input
- handler.wfile = output
- handler.request_version = 'HTTP/1.1'
-
- handler.send_header('Foo', 'foo')
- handler.send_header('bar', 'bar')
- self.assertEqual(output.numWrites, 0)
- handler.end_headers()
- self.assertEqual(output.getData(), b'Foo: foo\r\nbar: bar\r\n\r\n')
- self.assertEqual(output.numWrites, 1)
-
- def test_header_unbuffered_when_continue(self):
-
- def _readAndReseek(f):
- pos = f.tell()
- f.seek(0)
- data = f.read()
- f.seek(pos)
- return data
-
- input = BytesIO(bytes(b'GET / HTTP/1.1\r\nExpect: 100-continue\r\n\r\n'))
- output = BytesIO()
- self.handler.rfile = input
- self.handler.wfile = output
- self.handler.request_version = 'HTTP/1.1'
-
- self.handler.handle_one_request()
- self.assertNotEqual(_readAndReseek(output), b'')
- result = _readAndReseek(output).split(bytes(b'\r\n'))
- self.assertEqual(result[0], b'HTTP/1.1 100 Continue')
- self.assertEqual(result[1], b'HTTP/1.1 200 OK')
-
- def test_with_continue_rejected(self):
- usual_handler = self.handler # Save to avoid breaking any subsequent tests.
- self.handler = RejectingSocketlessRequestHandler()
- result = self.send_typical_request(bytes(b'GET / HTTP/1.1\r\nExpect: 100-continue\r\n\r\n'))
- self.assertEqual(result[0], b'HTTP/1.1 417 Expectation Failed\r\n')
- self.verify_expected_headers(result[1:-1])
- # The expect handler should short circuit the usual get method by
- # returning false here, so get_called should be false
- self.assertFalse(self.handler.get_called)
- self.assertEqual(sum(r == b'Connection: close\r\n' for r in result[1:-1]), 1)
- self.handler = usual_handler # Restore to avoid breaking any subsequent tests.
-
- def test_request_length(self):
- # Issue #10714: huge request lines are discarded, to avoid Denial
- # of Service attacks.
- result = self.send_typical_request(bytes(b'GET ') + bytes(b'x') * 65537)
- self.assertEqual(result[0], b'HTTP/1.1 414 Request-URI Too Long\r\n')
- self.assertFalse(self.handler.get_called)
-
- def test_header_length(self):
- # Issue #6791: same for headers
- result = self.send_typical_request(
- bytes(b'GET / HTTP/1.1\r\nX-Foo: bar') + bytes(b'r') * 65537 + bytes(b'\r\n\r\n'))
- self.assertEqual(result[0], b'HTTP/1.1 400 Line too long\r\n')
- self.assertFalse(self.handler.get_called)
-
-
-class SimpleHTTPRequestHandlerTestCase(unittest.TestCase):
- """ Test url parsing """
- def setUp(self):
- self.translated = os.getcwd()
- self.translated = os.path.join(self.translated, 'filename')
- self.handler = SocketlessRequestHandler()
-
- def test_query_arguments(self):
- path = self.handler.translate_path('/filename')
- self.assertEqual(path, self.translated)
- path = self.handler.translate_path('/filename?foo=bar')
- self.assertEqual(path, self.translated)
- path = self.handler.translate_path('/filename?a=b&spam=eggs#zot')
- self.assertEqual(path, self.translated)
-
- def test_start_with_double_slash(self):
- path = self.handler.translate_path('//filename')
- self.assertEqual(path, self.translated)
- path = self.handler.translate_path('//filename?foo=bar')
- self.assertEqual(path, self.translated)
-
-
-class DummyTest(unittest.TestCase):
- """
- It might help on travis-ci to have at least one test being executed
- for this module.
- """
- def test_nothing(self):
- self.assertTrue(True)
-
-
-def test_main(verbose=None):
- cwd = os.getcwd()
- try:
- support.run_unittest(
- BaseHTTPRequestHandlerTestCase,
- BaseHTTPServerTestCase,
- SimpleHTTPServerTestCase,
- CGIHTTPServerTestCase,
- SimpleHTTPRequestHandlerTestCase,
- )
- finally:
- os.chdir(cwd)
-
-if __name__ == '__main__':
- test_main()
diff --git a/future/tests/test_int.py b/future/tests/test_int.py
deleted file mode 100644
index 4d9fe587..00000000
--- a/future/tests/test_int.py
+++ /dev/null
@@ -1,420 +0,0 @@
-"""
-int tests from Py3.3
-"""
-
-from __future__ import (absolute_import, division,
- print_function, unicode_literals)
-from future import standard_library, utils
-from future.builtins import *
-from future.tests.base import unittest
-
-import sys
-import random
-from test import support
-
-
-L = [
- ('0', 0),
- ('1', 1),
- ('9', 9),
- ('10', 10),
- ('99', 99),
- ('100', 100),
- ('314', 314),
- (' 314', 314),
- ('314 ', 314),
- (' \t\t 314 \t\t ', 314),
- (repr(sys.maxsize), sys.maxsize),
- (' 1x', ValueError),
- (' 1 ', 1),
- (' 1\02 ', ValueError),
- ('', ValueError),
- (' ', ValueError),
- (' \t\t ', ValueError),
- ("\u0200", ValueError)
-]
-
-class IntTestCases(unittest.TestCase):
-
- def test_basic(self):
- self.assertEqual(int(314), 314)
- self.assertEqual(int(3.14), 3)
- # Check that conversion from float truncates towards zero
- self.assertEqual(int(-3.14), -3)
- self.assertEqual(int(3.9), 3)
- self.assertEqual(int(-3.9), -3)
- self.assertEqual(int(3.5), 3)
- self.assertEqual(int(-3.5), -3)
- self.assertEqual(int("-3"), -3)
- self.assertEqual(int(" -3 "), -3)
- self.assertEqual(int("\N{EM SPACE}-3\N{EN SPACE}"), -3)
- # Different base:
- self.assertEqual(int("10",16), 16)
- # Test conversion from strings and various anomalies
- for s, v in L:
- for sign in "", "+", "-":
- for prefix in "", " ", "\t", " \t\t ":
- ss = prefix + sign + s
- vv = v
- if sign == "-" and v is not ValueError:
- vv = -v
- try:
- self.assertEqual(int(ss), vv)
- except ValueError:
- pass
-
- s = repr(-1-sys.maxsize)
- x = int(s)
- self.assertEqual(x+1, -sys.maxsize)
- self.assertIsInstance(x, int)
- # should return int
- self.assertEqual(int(s[1:]), sys.maxsize+1)
-
- # should return int
- x = int(1e100)
- self.assertIsInstance(x, int)
- x = int(-1e100)
- self.assertIsInstance(x, int)
-
-
- # SF bug 434186: 0x80000000/2 != 0x80000000>>1.
- # Worked by accident in Windows release build, but failed in debug build.
- # Failed in all Linux builds.
- x = -1-sys.maxsize
- self.assertEqual(x >> 1, x//2)
-
- self.assertRaises(ValueError, int, '123\0')
- self.assertRaises(ValueError, int, '53', 40)
-
- # SF bug 1545497: embedded NULs were not detected with
- # explicit base
- self.assertRaises(ValueError, int, '123\0', 10)
- self.assertRaises(ValueError, int, '123\x00 245', 20)
-
- x = int('1' * 600)
- self.assertIsInstance(x, int)
-
-
- self.assertRaises(TypeError, int, 1, 12)
-
- self.assertEqual(int('0o123', 0), 83)
- self.assertEqual(int('0x123', 16), 291)
-
- # Bug 1679: "0x" is not a valid hex literal
- self.assertRaises(ValueError, int, "0x", 16)
- self.assertRaises(ValueError, int, "0x", 0)
-
- self.assertRaises(ValueError, int, "0o", 8)
- self.assertRaises(ValueError, int, "0o", 0)
-
- self.assertRaises(ValueError, int, "0b", 2)
- self.assertRaises(ValueError, int, "0b", 0)
-
- # SF bug 1334662: int(string, base) wrong answers
- # Various representations of 2**32 evaluated to 0
- # rather than 2**32 in previous versions
-
- self.assertEqual(int('100000000000000000000000000000000', 2), 4294967296)
- self.assertEqual(int('102002022201221111211', 3), 4294967296)
- self.assertEqual(int('10000000000000000', 4), 4294967296)
- self.assertEqual(int('32244002423141', 5), 4294967296)
- self.assertEqual(int('1550104015504', 6), 4294967296)
- self.assertEqual(int('211301422354', 7), 4294967296)
- self.assertEqual(int('40000000000', 8), 4294967296)
- self.assertEqual(int('12068657454', 9), 4294967296)
- self.assertEqual(int('4294967296', 10), 4294967296)
- self.assertEqual(int('1904440554', 11), 4294967296)
- self.assertEqual(int('9ba461594', 12), 4294967296)
- self.assertEqual(int('535a79889', 13), 4294967296)
- self.assertEqual(int('2ca5b7464', 14), 4294967296)
- self.assertEqual(int('1a20dcd81', 15), 4294967296)
- self.assertEqual(int('100000000', 16), 4294967296)
- self.assertEqual(int('a7ffda91', 17), 4294967296)
- self.assertEqual(int('704he7g4', 18), 4294967296)
- self.assertEqual(int('4f5aff66', 19), 4294967296)
- self.assertEqual(int('3723ai4g', 20), 4294967296)
- self.assertEqual(int('281d55i4', 21), 4294967296)
- self.assertEqual(int('1fj8b184', 22), 4294967296)
- self.assertEqual(int('1606k7ic', 23), 4294967296)
- self.assertEqual(int('mb994ag', 24), 4294967296)
- self.assertEqual(int('hek2mgl', 25), 4294967296)
- self.assertEqual(int('dnchbnm', 26), 4294967296)
- self.assertEqual(int('b28jpdm', 27), 4294967296)
- self.assertEqual(int('8pfgih4', 28), 4294967296)
- self.assertEqual(int('76beigg', 29), 4294967296)
- self.assertEqual(int('5qmcpqg', 30), 4294967296)
- self.assertEqual(int('4q0jto4', 31), 4294967296)
- self.assertEqual(int('4000000', 32), 4294967296)
- self.assertEqual(int('3aokq94', 33), 4294967296)
- self.assertEqual(int('2qhxjli', 34), 4294967296)
- self.assertEqual(int('2br45qb', 35), 4294967296)
- self.assertEqual(int('1z141z4', 36), 4294967296)
-
- # tests with base 0
- # this fails on 3.0, but in 2.x the old octal syntax is allowed
- self.assertEqual(int(' 0o123 ', 0), 83)
- self.assertEqual(int(' 0o123 ', 0), 83)
- self.assertEqual(int('000', 0), 0)
- self.assertEqual(int('0o123', 0), 83)
- self.assertEqual(int('0x123', 0), 291)
- self.assertEqual(int('0b100', 0), 4)
- self.assertEqual(int(' 0O123 ', 0), 83)
- self.assertEqual(int(' 0X123 ', 0), 291)
- self.assertEqual(int(' 0B100 ', 0), 4)
-
- # without base still base 10
- self.assertEqual(int('0123'), 123)
- self.assertEqual(int('0123', 10), 123)
-
- # tests with prefix and base != 0
- self.assertEqual(int('0x123', 16), 291)
- self.assertEqual(int('0o123', 8), 83)
- self.assertEqual(int('0b100', 2), 4)
- self.assertEqual(int('0X123', 16), 291)
- self.assertEqual(int('0O123', 8), 83)
- self.assertEqual(int('0B100', 2), 4)
-
- # the code has special checks for the first character after the
- # type prefix
- self.assertRaises(ValueError, int, '0b2', 2)
- self.assertRaises(ValueError, int, '0b02', 2)
- self.assertRaises(ValueError, int, '0B2', 2)
- self.assertRaises(ValueError, int, '0B02', 2)
- self.assertRaises(ValueError, int, '0o8', 8)
- self.assertRaises(ValueError, int, '0o08', 8)
- self.assertRaises(ValueError, int, '0O8', 8)
- self.assertRaises(ValueError, int, '0O08', 8)
- self.assertRaises(ValueError, int, '0xg', 16)
- self.assertRaises(ValueError, int, '0x0g', 16)
- self.assertRaises(ValueError, int, '0Xg', 16)
- self.assertRaises(ValueError, int, '0X0g', 16)
-
- # SF bug 1334662: int(string, base) wrong answers
- # Checks for proper evaluation of 2**32 + 1
- self.assertEqual(int('100000000000000000000000000000001', 2), 4294967297)
- self.assertEqual(int('102002022201221111212', 3), 4294967297)
- self.assertEqual(int('10000000000000001', 4), 4294967297)
- self.assertEqual(int('32244002423142', 5), 4294967297)
- self.assertEqual(int('1550104015505', 6), 4294967297)
- self.assertEqual(int('211301422355', 7), 4294967297)
- self.assertEqual(int('40000000001', 8), 4294967297)
- self.assertEqual(int('12068657455', 9), 4294967297)
- self.assertEqual(int('4294967297', 10), 4294967297)
- self.assertEqual(int('1904440555', 11), 4294967297)
- self.assertEqual(int('9ba461595', 12), 4294967297)
- self.assertEqual(int('535a7988a', 13), 4294967297)
- self.assertEqual(int('2ca5b7465', 14), 4294967297)
- self.assertEqual(int('1a20dcd82', 15), 4294967297)
- self.assertEqual(int('100000001', 16), 4294967297)
- self.assertEqual(int('a7ffda92', 17), 4294967297)
- self.assertEqual(int('704he7g5', 18), 4294967297)
- self.assertEqual(int('4f5aff67', 19), 4294967297)
- self.assertEqual(int('3723ai4h', 20), 4294967297)
- self.assertEqual(int('281d55i5', 21), 4294967297)
- self.assertEqual(int('1fj8b185', 22), 4294967297)
- self.assertEqual(int('1606k7id', 23), 4294967297)
- self.assertEqual(int('mb994ah', 24), 4294967297)
- self.assertEqual(int('hek2mgm', 25), 4294967297)
- self.assertEqual(int('dnchbnn', 26), 4294967297)
- self.assertEqual(int('b28jpdn', 27), 4294967297)
- self.assertEqual(int('8pfgih5', 28), 4294967297)
- self.assertEqual(int('76beigh', 29), 4294967297)
- self.assertEqual(int('5qmcpqh', 30), 4294967297)
- self.assertEqual(int('4q0jto5', 31), 4294967297)
- self.assertEqual(int('4000001', 32), 4294967297)
- self.assertEqual(int('3aokq95', 33), 4294967297)
- self.assertEqual(int('2qhxjlj', 34), 4294967297)
- self.assertEqual(int('2br45qc', 35), 4294967297)
- self.assertEqual(int('1z141z5', 36), 4294967297)
-
- @unittest.expectedFailure # fails on Py2
- @support.cpython_only
- def test_small_ints(self):
- # Bug #3236: Return small longs from PyLong_FromString
- self.assertIs(int('10'), 10)
- self.assertIs(int('-1'), -1)
- self.assertIs(int(b'10'), 10)
- self.assertIs(int(b'-1'), -1)
-
- def test_no_args(self):
- self.assertEqual(int(), 0)
-
- def test_keyword_args(self):
- # Test invoking int() using keyword arguments.
- self.assertEqual(int(x=1.2), 1)
- self.assertEqual(int('100', base=2), 4)
- self.assertEqual(int(x='100', base=2), 4)
-
- @unittest.expectedFailure
- def test_keyword_args_2(self):
- # newint causes these to fail:
- self.assertRaises(TypeError, int, base=10)
- self.assertRaises(TypeError, int, base=0)
-
- def test_non_numeric_input_types(self):
- # Test possible non-numeric types for the argument x, including
- # subclasses of the explicitly documented accepted types.
- class CustomStr(str): pass
- class CustomBytes(bytes): pass
- class CustomByteArray(bytearray): pass
-
- values = [b'100',
- bytearray(b'100'),
- CustomStr('100'),
- CustomBytes(b'100'),
- CustomByteArray(b'100')]
-
- for x in values:
- msg = 'x has type %s' % type(x).__name__
- self.assertEqual(int(x), 100, msg=msg)
- self.assertEqual(int(x, 2), 4, msg=msg)
-
- def test_string_float(self):
- self.assertRaises(ValueError, int, '1.2')
-
- def test_intconversion(self):
- # Test __int__()
- class ClassicMissingMethods:
- pass
- # The following raises an AttributeError (for '__trunc__') on Py2
- # but a TypeError on Py3 (which uses new-style classes).
- # Perhaps nothing is to be done but avoiding old-style classes!
- # ...
- # self.assertRaises(TypeError, int, ClassicMissingMethods())
-
- class MissingMethods(object):
- pass
- self.assertRaises(TypeError, int, MissingMethods())
-
- class Foo0:
- def __int__(self):
- return 42
-
- class Foo1(object):
- def __int__(self):
- return 42
-
- class Foo2(int):
- def __int__(self):
- return 42
-
- class Foo3(int):
- def __int__(self):
- return self
-
- class Foo4(int):
- def __int__(self):
- return 42
-
- class Foo5(int):
- def __int__(self):
- return 42.
-
- self.assertEqual(int(Foo0()), 42)
- self.assertEqual(int(Foo1()), 42)
- self.assertEqual(int(Foo2()), 42)
- self.assertEqual(int(Foo3()), 0)
- self.assertEqual(int(Foo4()), 42)
- self.assertRaises(TypeError, int, Foo5())
-
- class Classic:
- pass
- for base in (object, Classic):
- class IntOverridesTrunc(base):
- def __int__(self):
- return 42
- def __trunc__(self):
- return -12
- self.assertEqual(int(IntOverridesTrunc()), 42)
-
- class JustTrunc(base):
- def __trunc__(self):
- return 42
- self.assertEqual(int(JustTrunc()), 42)
-
- for trunc_result_base in (object, Classic):
- class Integral(trunc_result_base):
- def __int__(self):
- return 42
-
- class TruncReturnsNonInt(base):
- def __trunc__(self):
- return Integral()
- self.assertEqual(int(TruncReturnsNonInt()), 42)
-
- class NonIntegral(trunc_result_base):
- def __trunc__(self):
- # Check that we avoid infinite recursion.
- return NonIntegral()
-
- class TruncReturnsNonIntegral(base):
- def __trunc__(self):
- return NonIntegral()
- try:
- int(TruncReturnsNonIntegral())
- except TypeError as e:
- # self.assertEqual(str(e),
- # "__trunc__ returned non-Integral"
- # " (type NonIntegral)")
- pass
- else:
- self.fail("Failed to raise TypeError with %s" %
- ((base, trunc_result_base),))
-
- # Regression test for bugs.python.org/issue16060.
- class BadInt(trunc_result_base):
- def __int__(self):
- return 42.0
-
- class TruncReturnsBadInt(base):
- def __trunc__(self):
- return BadInt()
-
- with self.assertRaises(TypeError):
- int(TruncReturnsBadInt())
-
- ####################################################################
- # future-specific tests are below:
- ####################################################################
-
- # Exception messages in Py2 are 8-bit strings. The following fails,
- # even if the testlist strings are wrapped in str() calls...
- @unittest.expectedFailure
- def test_error_message(self):
- testlist = ('\xbd', '123\xbd', ' 123 456 ')
- for s in testlist:
- try:
- int(s)
- except ValueError as e:
- self.assertIn(s.strip(), e.args[0])
- else:
- self.fail("Expected int(%r) to raise a ValueError", s)
-
- def test_bytes_mul(self):
- self.assertEqual(b'\x00' * int(5), b'\x00' * 5)
- self.assertEqual(bytes(b'\x00') * int(5), bytes(b'\x00') * 5)
-
- def test_str_mul(self):
- self.assertEqual(u'\x00' * int(5), u'\x00' * 5)
- self.assertEqual(str(u'\x00') * int(5), str(u'\x00') * 5)
-
- def test_int_bytes(self):
- self.assertEqual(int(b'a\r\n', 16), 10)
- self.assertEqual(int(bytes(b'a\r\n'), 16), 10)
-
- def test_divmod(self):
- """
- Test int.__divmod__
- """
- vals = [10**i for i in range(0, 20)]
- for i in range(200):
- x = random.choice(vals)
- y = random.choice(vals)
- assert divmod(int(x), int(y)) == divmod(x, y)
- assert divmod(int(-x), int(y)) == divmod(-x, y)
- assert divmod(int(x), int(-y)) == divmod(x, -y)
- assert divmod(int(-x), int(-y)) == divmod(-x, -y)
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/future/tests/test_standard_library.py b/future/tests/test_standard_library.py
deleted file mode 100644
index 4b30ee71..00000000
--- a/future/tests/test_standard_library.py
+++ /dev/null
@@ -1,303 +0,0 @@
-"""
-Tests for the future.standard_library module
-"""
-
-from __future__ import absolute_import, unicode_literals, print_function
-from future import standard_library
-from future import utils
-from future.tests.base import unittest
-
-import sys
-import tempfile
-import os
-import copy
-
-from future.standard_library import RENAMES, REPLACED_MODULES
-from future.tests.base import CodeHandler
-
-
-class TestStandardLibraryRenames(CodeHandler):
-
- def setUp(self):
- self.interpreter = 'python'
- self.tempdir = tempfile.mkdtemp() + os.path.sep
-
- @unittest.skipIf(utils.PY3, 'generic import tests are for Py2 only')
- def test_all(self):
- """
- Tests whether all of the old imports in RENAMES are accessible
- under their new names.
- """
- for (oldname, newname) in RENAMES.items():
- if newname == 'winreg' and sys.platform not in ['win32', 'win64']:
- continue
- if newname in REPLACED_MODULES:
- # Skip this check for e.g. the stdlib's ``test`` module,
- # which we have replaced completely.
- continue
- oldmod = __import__(oldname)
- newmod = __import__(newname)
- if '.' not in oldname:
- self.assertEqual(oldmod, newmod)
-
- def test_suspend_hooks(self):
- """
- Code like the try/except block here appears in Pyflakes v0.6.1. This
- method tests whether suspend_hooks() works as advertised.
- """
- example_PY2_check = False
- with standard_library.suspend_hooks():
- # An example of code that we don't want to break:
- try:
- import builtins # fragile check for Python 3.x
- except ImportError:
- example_PY2_check = True
- if utils.PY2:
- self.assertTrue(example_PY2_check)
- else:
- self.assertFalse(example_PY2_check)
- # The import should succeed again now:
- import builtins
-
- def test_remove_hooks(self):
- example_PY2_check = False
-
- standard_library.install_hooks()
- old_meta_path = copy.copy(sys.meta_path)
- import builtins
-
- standard_library.remove_hooks()
- self.assertTrue(len(old_meta_path) == len(sys.meta_path) + 1)
-
- # An example of fragile import code that we don't want to break:
- try:
- import builtins
- except ImportError:
- example_PY2_check = True
- if utils.PY2:
- self.assertTrue(example_PY2_check)
- else:
- self.assertFalse(example_PY2_check)
- standard_library.install_hooks()
- # The import should succeed again now:
- import builtins
- self.assertTrue(len(old_meta_path) == len(sys.meta_path))
-
- def test_remove_hooks2(self):
- """
- This verifies that modules like http.client are no longer accessible after
- disabling import hooks, even if they have been previously imported.
-
- The reason for this test is that Python caches imported modules in sys.modules.
- """
- standard_library.remove_hooks()
- try:
- from . import verify_remove_hooks_affects_imported_modules
- except RuntimeError as e:
- self.fail(e.message)
- finally:
- standard_library.install_hooks()
-
- def test_requests(self):
- """
- GitHub issue #19: conflict with ``requests``
- """
- # The below should succeed while ``requests`` is installed:
- from . import verify_requests_is_not_broken
-
- @unittest.skipIf(utils.PY3, 'not testing for old urllib on Py3')
- def test_old_urllib_import(self):
- """
- Tests whether an imported module can import the old urllib package.
- Importing future.standard_library in a script should be possible and
- not disrupt any uses of the old Py2 standard library names in modules
- imported by that script.
- """
- code1 = '''
- from future import standard_library
- import module_importing_old_urllib
- '''
- self._write_test_script(code1, 'runme.py')
- code2 = '''
- import urllib
- assert 'urlopen' in dir(urllib)
- print('Import succeeded!')
- '''
- self._write_test_script(code2, 'module_importing_old_urllib.py')
- output = self._run_test_script('runme.py')
- print(output)
- self.assertTrue(True)
-
- def test_sys_intern(self):
- """
- Py2's builtin intern() has been moved to the sys module. Tests
- whether sys.intern is available.
- """
- from sys import intern
- if utils.PY3:
- self.assertEqual(intern('hello'), 'hello')
- else:
- # intern() requires byte-strings on Py2:
- self.assertEqual(intern(b'hello'), b'hello')
-
- def test_sys_maxsize(self):
- """
- Tests whether sys.maxsize is available.
- """
- from sys import maxsize
- print(maxsize)
- self.assertTrue(maxsize > 0)
-
- def test_itertools_filterfalse(self):
- """
- Tests whether itertools.filterfalse is available.
- """
- from itertools import filterfalse
- not_div_by_3 = filterfalse(lambda x: x % 3 == 0, range(8))
- self.assertEqual(list(not_div_by_3), [1, 2, 4, 5, 7])
-
- def test_itertools_zip_longest(self):
- """
- Tests whether itertools.zip_longest is available.
- """
- from itertools import zip_longest
- a = (1, 2)
- b = [2, 4, 6]
- self.assertEqual(list(zip_longest(a, b)),
- [(1, 2), (2, 4), (None, 6)])
-
- # def test_import_from_module(self):
- # """
- # Tests whether e.g. "import socketserver" succeeds in a module
- # imported by another module. We do not want it to!
- # """
- # code1 = '''
- # from future import standard_library
- # import importme2
- # '''
- # code2 = '''
- # import socketserver
- # print('Import succeeded!')
- # '''
- # self._write_test_script(code1, 'importme1.py')
- # self._write_test_script(code2, 'importme2.py')
- # output = self._run_test_script('importme1.py')
- # print(output)
-
- def test_configparser(self):
- import configparser
-
- def test_copyreg(self):
- import copyreg
-
- def test_pickle(self):
- import pickle
-
- def test_profile(self):
- import profile
-
- def test_stringio(self):
- from io import StringIO
- s = StringIO('test')
- for method in ['tell', 'read', 'seek', 'close', 'flush']:
- self.assertTrue(hasattr(s, method))
-
- def test_bytesio(self):
- from io import BytesIO
- s = BytesIO(b'test')
- for method in ['tell', 'read', 'seek', 'close', 'flush', 'getvalue']:
- self.assertTrue(hasattr(s, method))
-
- def test_queue(self):
- import queue
- q = queue.Queue()
- q.put('thing')
- self.assertFalse(q.empty())
-
- def test_reprlib(self):
- import reprlib
-
- def test_socketserver(self):
- import socketserver
-
- @unittest.skip("Not testing tkinter import (it may be installed separately from Python)")
- def test_tkinter(self):
- import tkinter
-
- def test_builtins(self):
- import builtins
- self.assertTrue(hasattr(builtins, 'tuple'))
-
- @unittest.skip("skipping in case there's no net connection")
- def test_urllib_request(self):
- import urllib.request
- from pprint import pprint
- URL = 'http://pypi.python.org/pypi/{0}/json'
- package = 'future'
- r = urllib.request.urlopen(URL.format(package))
- # pprint(r.read().decode('utf-8'))
-
- def test_html_import(self):
- import html
- import html.entities
- import html.parser
-
- def test_http_client_import(self):
- import http.client
- self.assertTrue(True)
-
- @unittest.expectedFailure
- def test_http_imports(self):
- import http
- import http.server
- import http.cookies
- import http.cookiejar
-
- @unittest.expectedFailure
- def test_urllib_imports(self):
- import urllib
- import urllib.parse
- import urllib.request
- import urllib.robotparser
- import urllib.error
- import urllib.response
- self.assertTrue(True)
-
- @unittest.expectedFailure
- def test_urllib_parse(self):
- import urllib.parse
- URL = 'http://pypi.python.org/test_url/spaces oh no/'
- self.assertEqual(urllib.parse.quote(URL.format(package)), 'http%3A//pypi.python.org/test_url/spaces%20oh%20no/')
-
- def test_underscore_prefixed_modules(self):
- import _thread
- import _dummy_thread
- import _markupbase
- self.assertTrue(True)
-
- def test_reduce(self):
- """
- reduce has been moved to the functools module
- """
- import functools
- self.assertEqual(functools.reduce(lambda x, y: x+y, range(1, 6)), 15)
-
- def test_collections_userstuff(self):
- """
- UserDict, UserList, and UserString have been moved to the
- collections module.
- """
- from collections import UserDict
- from collections import UserList
- from collections import UserString
-
- def test_reload(self):
- """
- reload has been moved to the imp module
- """
- import imp
- imp.reload(imp)
- self.assertTrue(True)
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/future/tests/test_super.py b/future/tests/test_super.py
deleted file mode 100644
index d8959f3a..00000000
--- a/future/tests/test_super.py
+++ /dev/null
@@ -1,10 +0,0 @@
-'''
-Tests for the new super() function syntax
-'''
-from __future__ import absolute_import, print_function
-from future.builtins.backports import super
-from future.tests.base import unittest
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/future/tests/test_utils.py b/future/tests/test_utils.py
deleted file mode 100644
index 37fceb3c..00000000
--- a/future/tests/test_utils.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Tests for the various utility functions and classes in ``future.utils``
-"""
-
-from __future__ import absolute_import, unicode_literals, print_function
-import sys
-from future.builtins import *
-from future.utils import (old_div, istext, isbytes, native, PY2, PY3,
- native_str, raise_)
-
-
-from numbers import Integral
-from future.tests.base import unittest
-
-TEST_UNICODE_STR = u'ℝεα∂@ßʟ℮ ☂ℯṧт υηḯ¢☺ḓ℮'
-
-
-class TestUtils(unittest.TestCase):
- def setUp(self):
- self.s = TEST_UNICODE_STR
- self.s2 = str(self.s)
- self.b = b'ABCDEFG'
- self.b2 = bytes(self.b)
-
- def test_old_div(self):
- """
- Tests whether old_div(a, b) is always equal to Python 2's a / b.
- """
- self.assertEqual(old_div(1, 2), 0)
- self.assertEqual(old_div(2, 2), 1)
- self.assertTrue(isinstance(old_div(2, 2), int))
-
- self.assertEqual(old_div(3, 2), 1)
- self.assertTrue(isinstance(old_div(3, 2), int))
-
- self.assertEqual(old_div(3., 2), 1.5)
- self.assertTrue(not isinstance(old_div(3., 2), int))
-
- self.assertEqual(old_div(-1, 2.), -0.5)
- self.assertTrue(not isinstance(old_div(-1, 2.), int))
-
- with self.assertRaises(ZeroDivisionError):
- old_div(0, 0)
- with self.assertRaises(ZeroDivisionError):
- old_div(1, 0)
-
- def test_native_str(self):
- """
- Tests whether native_str is really equal to the platform str.
- """
- if PY2:
- import __builtin__
- builtin_str = __builtin__.str
- else:
- import builtins
- builtin_str = builtins.str
-
- inputs = [b'blah', u'blah', 'blah']
- for s in inputs:
- self.assertEqual(native_str(s), builtin_str(s))
- self.assertTrue(isinstance(native_str(s), builtin_str))
-
- def test_native(self):
- a = int(10**20) # long int
- b = native(a)
- self.assertEqual(a, b)
- if PY2:
- self.assertEqual(type(b), long)
- else:
- self.assertEqual(type(b), int)
-
- c = bytes(b'ABC')
- d = native(c)
- self.assertEqual(c, d)
- if PY2:
- self.assertEqual(type(d), type(b'Py2 byte-string'))
- else:
- self.assertEqual(type(d), bytes)
-
- s = str(u'ABC')
- t = native(s)
- self.assertEqual(s, t)
- if PY2:
- self.assertEqual(type(t), unicode)
- else:
- self.assertEqual(type(t), str)
- type(s)
-
- def test_istext(self):
- self.assertTrue(istext(self.s))
- self.assertTrue(istext(self.s2))
- self.assertFalse(istext(self.b))
- self.assertFalse(istext(self.b2))
-
- def test_isbytes(self):
- self.assertTrue(isbytes(self.b))
- self.assertTrue(isbytes(self.b2))
- self.assertFalse(isbytes(self.s))
- self.assertFalse(isbytes(self.s2))
-
- def test_raise_(self):
- def valerror():
- try:
- raise ValueError("Apples!")
- except Exception as e:
- raise_(e)
-
- self.assertRaises(ValueError, valerror)
-
- def with_value():
- raise_(IOError, "This is an error")
-
- self.assertRaises(IOError, with_value)
-
- try:
- with_value()
- except IOError as e:
- self.assertEqual(str(e), "This is an error")
-
- def with_traceback():
- try:
- raise ValueError("An error")
- except Exception as e:
- _, _, traceback = sys.exc_info()
- raise_(IOError, str(e), traceback)
-
- self.assertRaises(IOError, with_traceback)
-
- try:
- with_traceback()
- except IOError as e:
- self.assertEqual(str(e), "An error")
-
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/future/utils/__init__.py b/future/utils/__init__.py
deleted file mode 100644
index 1e7e0b68..00000000
--- a/future/utils/__init__.py
+++ /dev/null
@@ -1,534 +0,0 @@
-"""
-A selection of cross-compatible functions for Python 2 and 3.
-
-These come from several sources:
-* Jinja2 (BSD licensed: see https://github.com/mitsuhiko/jinja2/blob/master/LICENSE
-* Pandas compatibility module pandas.compat
-* six.py by Benjamin Peterson
-* Django
-
-This exports useful functions for 2/3 compatible code that are not
-builtins on Python 3:
-* bind_method: binds functions to classes
-* ``native_str_to_bytes`` and ``bytes_to_native_str``
-* ``native_str``: always equal to the native platform string object (because
- this may be shadowed by imports from future.builtins)
-* lists: lrange(), lmap(), lzip(), lfilter()
-* iterable method compatibility: iteritems, iterkeys, itervalues
- * Uses the original method if available, otherwise uses items, keys, values.
-* types:
- * text_type: unicode in Python 2, str in Python 3
- * binary_type: str in Python 2, bythes in Python 3
- * string_types: basestring in Python 2, str in Python 3
-
-* bchr(c):
- Take an integer and make a 1-character byte string
-* bord(c)
- Take the result of indexing on a byte string and make an integer
-* tobytes(s)
- Take a text string, a byte string, or a sequence of characters taken
- from a byte string, and make a byte string.
-
-This module also defines a simple decorator called
-``python_2_unicode_compatible`` (from django.utils.encoding) which
-defines ``__unicode__`` and ``__str__`` methods consistently under Python
-3 and 2. To support Python 3 and 2 with a single code base, simply define
-a ``__str__`` method returning unicode text and apply the
-python_2_unicode_compatible decorator to the class like this::
-
- >>> from future.utils import python_2_unicode_compatible
-
- >>> @python_2_unicode_compatible
- ... class MyClass(object):
- ... def __str__(self):
- ... return u'Unicode string: \u5b54\u5b50'
-
- >>> a = MyClass()
-
-Then, after this import:
-
- >>> from future.builtins import str
-
-the following is ``True`` on both Python 3 and 2::
-
- >>> str(a) == a.encode('utf-8').decode('utf-8')
- True
-
-and, on a Unicode-enabled terminal with the right fonts, these both print the
-Chinese characters for Confucius::
-
- print(a)
- print(str(a))
-
-On Python 3, this decorator is a no-op.
-
-"""
-
-import types
-import sys
-import numbers
-
-PY3 = sys.version_info[0] == 3
-PY2 = sys.version_info[0] == 2
-PYPY = hasattr(sys, 'pypy_translation_info')
-
-
-def python_2_unicode_compatible(cls):
- """
- A decorator that defines __unicode__ and __str__ methods under Python
- 2. Under Python 3 it does nothing.
-
- To support Python 2 and 3 with a single code base, define a __str__
- method returning unicode text and apply this decorator to the class.
-
- The implementation comes from django.utils.encoding.
- """
- if not PY3:
- cls.__unicode__ = cls.__str__
- cls.__str__ = lambda self: self.__unicode__().encode('utf-8')
- return cls
-
-
-def with_metaclass(meta, *bases):
- """
- Function from jinja2/_compat.py. License: BSD.
-
- Use it like this::
-
- class BaseForm(object):
- pass
-
- class FormType(type):
- pass
-
- class Form(with_metaclass(FormType, BaseForm)):
- pass
-
- This requires a bit of explanation: the basic idea is to make a
- dummy metaclass for one level of class instantiation that replaces
- itself with the actual metaclass. Because of internal type checks
- we also need to make sure that we downgrade the custom metaclass
- for one level to something closer to type (that's why __call__ and
- __init__ comes back from type etc.).
-
- This has the advantage over six.with_metaclass of not introducing
- dummy classes into the final MRO.
- """
- class metaclass(meta):
- __call__ = type.__call__
- __init__ = type.__init__
- def __new__(cls, name, this_bases, d):
- if this_bases is None:
- return type.__new__(cls, name, (), d)
- return meta(name, bases, d)
- return metaclass('temporary_class', None, {})
-
-
-# Definitions from pandas.compat follow:
-if PY3:
- def bchr(s):
- return bytes([s])
- def bstr(s):
- if isinstance(s, str):
- return bytes(s, 'latin-1')
- else:
- return bytes(s)
- def bord(s):
- return s
-else:
- # Python 2
- def bchr(s):
- return chr(s)
- def bstr(s):
- return str(s)
- def bord(s):
- return ord(s)
-
-###
-
-if PY3:
- def tobytes(s):
- if isinstance(s, bytes):
- return s
- else:
- if isinstance(s, str):
- return s.encode('latin-1')
- else:
- return bytes(s)
-else:
- # Python 2
- def tobytes(s):
- '''
- Encodes to latin-1 (where the first 256 chars are the same as
- ASCII.)
- '''
- if isinstance(s, unicode):
- return s.encode('latin-1')
- else:
- return ''.join(s)
-
-if PY3:
- def native_str_to_bytes(s, encoding='ascii'):
- return s.encode(encoding)
-
- def bytes_to_native_str(b, encoding='ascii'):
- return b.decode(encoding)
-else:
- # Python 2
- def native_str_to_bytes(s, encoding='ascii'):
- return s
-
- def bytes_to_native_str(b, encoding='ascii'):
- return b
-
-
-if PY3:
- # list-producing versions of the major Python iterating functions
- def lrange(*args, **kwargs):
- return list(range(*args, **kwargs))
-
- def lzip(*args, **kwargs):
- return list(zip(*args, **kwargs))
-
- def lmap(*args, **kwargs):
- return list(map(*args, **kwargs))
-
- def lfilter(*args, **kwargs):
- return list(filter(*args, **kwargs))
-else:
- import __builtin__
- # Python 2-builtin ranges produce lists
- lrange = __builtin__.range
- lzip = __builtin__.zip
- lmap = __builtin__.map
- lfilter = __builtin__.filter
-
-
-def isidentifier(s, dotted=False):
- '''
- A function equivalent to the str.isidentifier method on Py3
- '''
- if dotted:
- return all(isidentifier(a) for a in s.split('.'))
- if PY3:
- return s.isidentifier()
- else:
- import re
- _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$")
- return bool(_name_re.match(s))
-
-
-def viewitems(obj, **kwargs):
- """
- Function for iterating over dictionary items with the same set-like
- behaviour on Py2.7 as on Py3.
-
- Passes kwargs to method."""
- func = getattr(obj, "viewitems", None)
- if not func:
- func = obj.items
- return func(**kwargs)
-
-
-def viewkeys(obj, **kwargs):
- """
- Function for iterating over dictionary keys with the same set-like
- behaviour on Py2.7 as on Py3.
-
- Passes kwargs to method."""
- func = getattr(obj, "viewkeys", None)
- if not func:
- func = obj.keys
- return func(**kwargs)
-
-
-def viewvalues(obj, **kwargs):
- """
- Function for iterating over dictionary values with the same set-like
- behaviour on Py2.7 as on Py3.
-
- Passes kwargs to method."""
- func = getattr(obj, "viewvalues", None)
- if not func:
- func = obj.values
- return func(**kwargs)
-
-
-def iteritems(obj, **kwargs):
- """Use this only if compatibility with Python versions before 2.7 is
- required. Otherwise, prefer viewitems().
- """
- func = getattr(obj, "iteritems", None)
- if not func:
- func = obj.items
- return func(**kwargs)
-
-
-def iterkeys(obj, **kwargs):
- """Use this only if compatibility with Python versions before 2.7 is
- required. Otherwise, prefer viewkeys().
- """
- func = getattr(obj, "iterkeys", None)
- if not func:
- func = obj.keys
- return func(**kwargs)
-
-
-def itervalues(obj, **kwargs):
- """Use this only if compatibility with Python versions before 2.7 is
- required. Otherwise, prefer viewvalues().
- """
- func = getattr(obj, "itervalues", None)
- if not func:
- func = obj.values
- return func(**kwargs)
-
-
-def bind_method(cls, name, func):
- """Bind a method to class, python 2 and python 3 compatible.
-
- Parameters
- ----------
-
- cls : type
- class to receive bound method
- name : basestring
- name of method on class instance
- func : function
- function to be bound as method
-
- Returns
- -------
- None
- """
- # only python 2 has bound/unbound method issue
- if not PY3:
- setattr(cls, name, types.MethodType(func, None, cls))
- else:
- setattr(cls, name, func)
-
-
-def getexception():
- return sys.exc_info()[1]
-
-
-if PY3:
- def raise_(tp, value=None, tb=None):
- """
- A function that matches the Python 2.x ``raise`` statement. This
- allows re-raising exceptions with the cls value and traceback on
- Python 2 and 3.
- """
- if value is not None and isinstance(tp, Exception):
- raise TypeError("instance exception may not have a separate value")
- if value is not None:
- exc = tp(value)
- else:
- exc = tp
- if exc.__traceback__ is not tb:
- raise exc.with_traceback(tb)
-
- def raise_with_traceback(exc, traceback=Ellipsis):
- if traceback == Ellipsis:
- _, _, traceback = sys.exc_info()
- raise exc.with_traceback(traceback)
-
-else:
- exec('''
-def raise_(tp, value=None, tb=None):
- raise tp, value, tb
-
-def raise_with_traceback(exc, traceback=Ellipsis):
- if traceback == Ellipsis:
- _, _, traceback = sys.exc_info()
- raise exc, None, traceback
-'''.strip())
-
-
-raise_with_traceback.__doc__ = (
-"""Raise exception with existing traceback.
-If traceback is not passed, uses sys.exc_info() to get traceback."""
-)
-
-
-# Deprecated alias for backward compatibility with ``future`` versions < 0.11:
-reraise = raise_
-
-
-def implements_iterator(cls):
- '''
- From jinja2/_compat.py. License: BSD.
-
- Use as a decorator like this::
-
- @implements_iterator
- class UppercasingIterator(object):
- def __init__(self, iterable):
- self._iter = iter(iterable)
- def __iter__(self):
- return self
- def __next__(self):
- return next(self._iter).upper()
-
- '''
- if PY3:
- return cls
- else:
- cls.next = cls.__next__
- del cls.__next__
- return cls
-
-if PY3:
- get_next = lambda x: x.next
-else:
- get_next = lambda x: x.__next__
-
-
-def encode_filename(filename):
- if PY3:
- return filename
- else:
- if isinstance(filename, unicode):
- return filename.encode('utf-8')
- return filename
-
-
-def is_new_style(cls):
- """
- Python 2.7 has both new-style and old-style classes. Old-style classes can
- be pesky in some circumstances, such as when using inheritance. Use this
- function to test for whether a class is new-style. (Python 3 only has
- new-style classes.)
- """
- return hasattr(cls, '__class__') and ('__dict__' in dir(cls)
- or hasattr(cls, '__slots__'))
-
-# The native platform string and bytes types. Useful because ``str`` and
-# ``bytes`` are redefined on Py2 by ``from future.builtins import *``.
-native_str = str
-native_bytes = bytes
-
-
-def istext(obj):
- """
- Deprecated. Use::
- >>> isinstance(obj, str)
- after this import:
- >>> from future.builtins import str
- """
- return isinstance(obj, type(u''))
-
-
-def isbytes(obj):
- """
- Deprecated. Use::
- >>> isinstance(obj, bytes)
- after this import:
- >>> from future.builtins import bytes
- """
- return isinstance(obj, type(b''))
-
-
-def isnewbytes(obj):
- """
- Equivalent to the result of ``isinstance(obj, newbytes)`` were
- ``__instancecheck__`` not overridden on the newbytes subclass. In
- other words, it is REALLY a newbytes instance, not a Py2 native str
- object?
- """
- # TODO: generalize this so that it works with subclasses of newbytes
- # Import is here to avoid circular imports:
- from future.builtins.backports.newbytes import newbytes
- return type(obj) == newbytes
-
-
-def isint(obj):
- """
- Deprecated. Tests whether an object is a Py3 ``int`` or either a Py2 ``int`` or
- ``long``.
-
- Instead of using this function, you can use:
-
- >>> from future.builtins import int
- >>> isinstance(obj, int)
-
- The following idiom is equivalent:
-
- >>> from numbers import Integral
- >>> isinstance(obj, Integral)
- """
-
- return isinstance(obj, numbers.Integral)
-
-
-def native(obj):
- """
- On Py3, this is a no-op: native(obj) -> obj
-
- On Py2, returns the corresponding native Py2 types that are
- superclasses for backported objects from Py3:
-
- >>> from future.builtins import str, bytes, int
-
- >>> native(str(u'ABC'))
- u'ABC'
- >>> type(native(str(u'ABC')))
- unicode
-
- >>> native(bytes(b'ABC'))
- b'ABC'
- >>> type(native(bytes(b'ABC')))
- bytes
-
- >>> native(int(10**20))
- 100000000000000000000L
- >>> type(native(int(10**20)))
- long
-
- Existing native types on Py2 will be returned unchanged:
-
- >>> type(native(u'ABC'))
- unicode
- """
- if hasattr(obj, '__native__'):
- return obj.__native__()
- else:
- return obj
-
-
-# Implementation of exec_ is from ``six``:
-if PY3:
- import builtins
- exec_ = getattr(builtins, "exec")
-else:
- def exec_(code, globs=None, locs=None):
- """Execute code in a namespace."""
- if globs is None:
- frame = sys._getframe(1)
- globs = frame.f_globals
- if locs is None:
- locs = frame.f_locals
- del frame
- elif locs is None:
- locs = globs
- exec("""exec code in globs, locs""")
-
-
-def old_div(a, b):
- """
- Equivalent to ``a / b`` on Python 2 without ``from __future__ import
- division``.
- """
- return a // b if (isint(a) and isint(b)) else a / b
-
-
-__all__ = ['PY3', 'PY2', 'PYPY', 'python_2_unicode_compatible',
- 'with_metaclass', 'bchr', 'bstr', 'bord',
- 'tobytes', 'str_to_native_bytes', 'bytes_to_native_str',
- 'lrange', 'lmap', 'lzip', 'lfilter',
- 'isidentifier', 'iteritems', 'iterkeys', 'itervalues',
- 'viewitems', 'viewkeys', 'viewvalues',
- 'bind_method', 'getexception',
- 'reraise', 'implements_iterator', 'get_next', 'encode_filename',
- 'is_new_style', 'native_str']
-
diff --git a/future/utils/encoding.py b/future/utils/encoding.py
deleted file mode 100644
index 3dee86e0..00000000
--- a/future/utils/encoding.py
+++ /dev/null
@@ -1,211 +0,0 @@
-"""
-Various small encoding utils from django.utils.encoding. This has a
-BSD-based license; see
- https://github.com/django/django/blob/master/LICENSE
-"""
-from __future__ import unicode_literals
-
-import codecs
-import datetime
-from decimal import Decimal
-import locale
-try:
- from urllib.parse import quote
-except ImportError: # Python 2
- from urllib import quote
-
-from future.utils import six
-
-def python_2_unicode_compatible(klass):
- """
- A decorator that defines __unicode__ and __str__ methods under Python 2.
- Under Python 3 it does nothing.
-
- To support Python 2 and 3 with a single code base, define a __str__ method
- returning text and apply this decorator to the class.
- """
- if not six.PY3:
- klass.__unicode__ = klass.__str__
- klass.__str__ = lambda self: self.__unicode__().encode('utf-8')
- return klass
-
-def smart_text(s, encoding='utf-8', strings_only=False, errors='strict'):
- """
- Returns a text object representing 's' -- unicode on Python 2 and str on
- Python 3. Treats bytestrings using the 'encoding' codec.
-
- If strings_only is True, don't convert (some) non-string-like objects.
- """
- return force_text(s, encoding, strings_only, errors)
-
-def is_protected_type(obj):
- """Determine if the object instance is of a protected type.
-
- Objects of protected types are preserved as-is when passed to
- force_text(strings_only=True).
- """
- return isinstance(obj, six.integer_types + (type(None), float, Decimal,
- datetime.datetime, datetime.date, datetime.time))
-
-def force_text(s, encoding='utf-8', strings_only=False, errors='strict'):
- """
- Similar to smart_text, except that lazy instances are resolved to
- strings, rather than kept as lazy objects.
-
- If strings_only is True, don't convert (some) non-string-like objects.
- """
- # Handle the common case first, saves 30-40% when s is an instance of
- # six.text_type. This function gets called often in that setting.
- if isinstance(s, six.text_type):
- return s
- if strings_only and is_protected_type(s):
- return s
- try:
- if not isinstance(s, six.string_types):
- if hasattr(s, '__unicode__'):
- s = s.__unicode__()
- else:
- if six.PY3:
- if isinstance(s, bytes):
- s = six.text_type(s, encoding, errors)
- else:
- s = six.text_type(s)
- else:
- s = six.text_type(bytes(s), encoding, errors)
- else:
- # Note: We use .decode() here, instead of six.text_type(s, encoding,
- # errors), so that if s is a SafeBytes, it ends up being a
- # SafeText at the end.
- s = s.decode(encoding, errors)
- except UnicodeDecodeError as e:
- # If we get to here, the caller has passed in an Exception
- # subclass populated with non-ASCII bytestring data without a
- # working unicode method. Try to handle this without raising a
- # further exception by individually forcing the exception args
- # to unicode.
- s = ' '.join([force_text(arg, encoding, strings_only,
- errors) for arg in s])
- return s
-
-def smart_bytes(s, encoding='utf-8', strings_only=False, errors='strict'):
- """
- Returns a bytestring version of 's', encoded as specified in 'encoding'.
-
- If strings_only is True, don't convert (some) non-string-like objects.
- """
- return force_bytes(s, encoding, strings_only, errors)
-
-
-def force_bytes(s, encoding='utf-8', strings_only=False, errors='strict'):
- """
- Similar to smart_bytes, except that lazy instances are resolved to
- strings, rather than kept as lazy objects.
-
- If strings_only is True, don't convert (some) non-string-like objects.
- """
- if isinstance(s, six.memoryview):
- s = bytes(s)
- if isinstance(s, bytes):
- if encoding == 'utf-8':
- return s
- else:
- return s.decode('utf-8', errors).encode(encoding, errors)
- if strings_only and (s is None or isinstance(s, int)):
- return s
- if not isinstance(s, six.string_types):
- try:
- if six.PY3:
- return six.text_type(s).encode(encoding)
- else:
- return bytes(s)
- except UnicodeEncodeError:
- if isinstance(s, Exception):
- # An Exception subclass containing non-ASCII data that doesn't
- # know how to print itself properly. We shouldn't raise a
- # further exception.
- return b' '.join([force_bytes(arg, encoding, strings_only,
- errors) for arg in s])
- return six.text_type(s).encode(encoding, errors)
- else:
- return s.encode(encoding, errors)
-
-if six.PY3:
- smart_str = smart_text
- force_str = force_text
-else:
- smart_str = smart_bytes
- force_str = force_bytes
- # backwards compatibility for Python 2
- smart_unicode = smart_text
- force_unicode = force_text
-
-smart_str.__doc__ = """\
-Apply smart_text in Python 3 and smart_bytes in Python 2.
-
-This is suitable for writing to sys.stdout (for instance).
-"""
-
-force_str.__doc__ = """\
-Apply force_text in Python 3 and force_bytes in Python 2.
-"""
-
-def iri_to_uri(iri):
- """
- Convert an Internationalized Resource Identifier (IRI) portion to a URI
- portion that is suitable for inclusion in a URL.
-
- This is the algorithm from section 3.1 of RFC 3987. However, since we are
- assuming input is either UTF-8 or unicode already, we can simplify things a
- little from the full method.
-
- Returns an ASCII string containing the encoded result.
- """
- # The list of safe characters here is constructed from the "reserved" and
- # "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986:
- # reserved = gen-delims / sub-delims
- # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
- # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
- # / "*" / "+" / "," / ";" / "="
- # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
- # Of the unreserved characters, urllib.quote already considers all but
- # the ~ safe.
- # The % character is also added to the list of safe characters here, as the
- # end of section 3.1 of RFC 3987 specifically mentions that % must not be
- # converted.
- if iri is None:
- return iri
- return quote(force_bytes(iri), safe=b"/#%[]=:;$&()+,!?*@'~")
-
-def filepath_to_uri(path):
- """Convert a file system path to a URI portion that is suitable for
- inclusion in a URL.
-
- We are assuming input is either UTF-8 or unicode already.
-
- This method will encode certain chars that would normally be recognized as
- special chars for URIs. Note that this method does not encode the '
- character, as it is a valid character within URIs. See
- encodeURIComponent() JavaScript function for more details.
-
- Returns an ASCII string containing the encoded result.
- """
- if path is None:
- return path
- # I know about `os.sep` and `os.altsep` but I want to leave
- # some flexibility for hardcoding separators.
- return quote(force_bytes(path).replace(b"\\", b"/"), safe=b"/~!*()'")
-
-def get_system_encoding():
- """
- The encoding of the default system locale but falls back to the given
- fallback encoding if the encoding is unsupported by python or could
- not be determined. See tickets #10335 and #5846
- """
- try:
- encoding = locale.getdefaultlocale()[1] or 'ascii'
- codecs.lookup(encoding)
- except Exception:
- encoding = 'ascii'
- return encoding
-
-DEFAULT_LOCALE_ENCODING = get_system_encoding()
diff --git a/future/utils/frompy2.py b/future/utils/frompy2.py
deleted file mode 100644
index c76007fe..00000000
--- a/future/utils/frompy2.py
+++ /dev/null
@@ -1,70 +0,0 @@
-"""
-A resurrection of some old functions from Python 2. These should be used
-sparingly, to help with porting efforts, since code using them is no
-longer standard Python 3 code.
-
-We provide these builtin functions which have no equivalent on Py3:
-
-- cmp()
-- execfile()
-
-These aliases are also provided:
-
-- raw_input() <- input()
-- unicode() <- str()
-- unichr() <- chr()
-
-For reference, the following Py2 builtin functions are available from
-these standard locations on both Py2.6+ and Py3:
-
-- reduce() <- functools.reduce()
-- reload() <- imp.reload()
-
-"""
-
-from __future__ import unicode_literals
-
-from future.utils import PY3
-
-
-if PY3:
- # Bring back the cmp function
- cmp = lambda a, b: (a > b) - (a < b)
- raw_input = input
- unicode = str
- unichr = chr
-else:
- cmp = __builtin__.cmp
- raw_input = __builtin__.raw_input
- unicode = __builtin__.unicode
- unichr = __builtin__.unichr
-
-
-def execfile(filename, myglobals=None, mylocals=None):
- if PY3:
- mylocals = mylocals if (mylocals is not None) else myglobals
- exec_(compile(open(filename).read(), filename, 'exec'),
- myglobals, mylocals)
- else:
- if sys.platform == 'win32':
- # The rstrip() is necessary b/c trailing whitespace in
- # files will cause an IndentationError in Python 2.6
- # (this was fixed in 2.7). See IPython issue 1027.
- scripttext = __builtin__.open(filename).read().rstrip() + '\n'
- # compile converts unicode filename to str assuming
- # ascii. Let's do the conversion before calling compile
- if isinstance(filename, unicode):
- filename = filename.encode(unicode, 'replace')
- # else:
- # filename = filename
- exec_(compile(scripttext, filename, 'exec') in glob, loc)
- else:
- if isinstance(filename, unicode):
- filename = filename.encode(sys.getfilesystemencoding())
- else:
- filename = filename
- __builtin__.execfile(filename, myglobals=myglobals,
- mylocals=mylocals)
-
-
-__all__ = ['cmp', 'raw_input', 'unichr', 'unicode', 'execfile']
diff --git a/future/utils/six.py b/future/utils/six.py
deleted file mode 100644
index d972faf3..00000000
--- a/future/utils/six.py
+++ /dev/null
@@ -1,582 +0,0 @@
-"""Utilities for writing code that runs on Python 2 and 3"""
-
-# Copyright (c) 2010-2013 Benjamin Peterson
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import operator
-import sys
-import types
-
-__author__ = "Benjamin Peterson "
-__version__ = "1.4.1"
-
-
-# Useful for very coarse version differentiation.
-PY2 = sys.version_info[0] == 2
-PY3 = sys.version_info[0] == 3
-
-if PY3:
- string_types = str,
- integer_types = int,
- class_types = type,
- text_type = str
- binary_type = bytes
-
- MAXSIZE = sys.maxsize
-else:
- string_types = basestring,
- integer_types = (int, long)
- class_types = (type, types.ClassType)
- text_type = unicode
- binary_type = str
-
- if sys.platform.startswith("java"):
- # Jython always uses 32 bits.
- MAXSIZE = int((1 << 31) - 1)
- else:
- # It's possible to have sizeof(long) != sizeof(Py_ssize_t).
- class X(object):
- def __len__(self):
- return 1 << 31
- try:
- len(X())
- except OverflowError:
- # 32-bit
- MAXSIZE = int((1 << 31) - 1)
- else:
- # 64-bit
- MAXSIZE = int((1 << 63) - 1)
- del X
-
-
-def _add_doc(func, doc):
- """Add documentation to a function."""
- func.__doc__ = doc
-
-
-def _import_module(name):
- """Import module, returning the module after the last dot."""
- __import__(name)
- return sys.modules[name]
-
-
-class _LazyDescr(object):
-
- def __init__(self, name):
- self.name = name
-
- def __get__(self, obj, tp):
- result = self._resolve()
- setattr(obj, self.name, result)
- # This is a bit ugly, but it avoids running this again.
- delattr(tp, self.name)
- return result
-
-
-class MovedModule(_LazyDescr):
-
- def __init__(self, name, old, new=None):
- super(MovedModule, self).__init__(name)
- if PY3:
- if new is None:
- new = name
- self.mod = new
- else:
- self.mod = old
-
- def _resolve(self):
- return _import_module(self.mod)
-
-
-class MovedAttribute(_LazyDescr):
-
- def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None):
- super(MovedAttribute, self).__init__(name)
- if PY3:
- if new_mod is None:
- new_mod = name
- self.mod = new_mod
- if new_attr is None:
- if old_attr is None:
- new_attr = name
- else:
- new_attr = old_attr
- self.attr = new_attr
- else:
- self.mod = old_mod
- if old_attr is None:
- old_attr = name
- self.attr = old_attr
-
- def _resolve(self):
- module = _import_module(self.mod)
- return getattr(module, self.attr)
-
-
-
-class _MovedItems(types.ModuleType):
- """Lazy loading of moved objects"""
-
-
-_moved_attributes = [
- MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"),
- MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"),
- MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"),
- MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"),
- MovedAttribute("map", "itertools", "builtins", "imap", "map"),
- MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"),
- MovedAttribute("reload_module", "__builtin__", "imp", "reload"),
- MovedAttribute("reduce", "__builtin__", "functools"),
- MovedAttribute("StringIO", "StringIO", "io"),
- MovedAttribute("UserString", "UserString", "collections"),
- MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"),
- MovedAttribute("zip", "itertools", "builtins", "izip", "zip"),
- MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"),
-
- MovedModule("builtins", "__builtin__"),
- MovedModule("configparser", "ConfigParser"),
- MovedModule("copyreg", "copy_reg"),
- MovedModule("http_cookiejar", "cookielib", "http.cookiejar"),
- MovedModule("http_cookies", "Cookie", "http.cookies"),
- MovedModule("html_entities", "htmlentitydefs", "html.entities"),
- MovedModule("html_parser", "HTMLParser", "html.parser"),
- MovedModule("http_client", "httplib", "http.client"),
- MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"),
- MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"),
- MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"),
- MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"),
- MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"),
- MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"),
- MovedModule("cPickle", "cPickle", "pickle"),
- MovedModule("queue", "Queue"),
- MovedModule("reprlib", "repr"),
- MovedModule("socketserver", "SocketServer"),
- MovedModule("tkinter", "Tkinter"),
- MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"),
- MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"),
- MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"),
- MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"),
- MovedModule("tkinter_tix", "Tix", "tkinter.tix"),
- MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"),
- MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"),
- MovedModule("tkinter_colorchooser", "tkColorChooser",
- "tkinter.colorchooser"),
- MovedModule("tkinter_commondialog", "tkCommonDialog",
- "tkinter.commondialog"),
- MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"),
- MovedModule("tkinter_font", "tkFont", "tkinter.font"),
- MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"),
- MovedModule("tkinter_tksimpledialog", "tkSimpleDialog",
- "tkinter.simpledialog"),
- MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"),
- MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"),
- MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"),
- MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"),
- MovedModule("winreg", "_winreg"),
-]
-for attr in _moved_attributes:
- setattr(_MovedItems, attr.name, attr)
-del attr
-
-moves = sys.modules[__name__ + ".moves"] = _MovedItems(__name__ + ".moves")
-
-
-
-class Module_six_moves_urllib_parse(types.ModuleType):
- """Lazy loading of moved objects in six.moves.urllib_parse"""
-
-
-_urllib_parse_moved_attributes = [
- MovedAttribute("ParseResult", "urlparse", "urllib.parse"),
- MovedAttribute("parse_qs", "urlparse", "urllib.parse"),
- MovedAttribute("parse_qsl", "urlparse", "urllib.parse"),
- MovedAttribute("urldefrag", "urlparse", "urllib.parse"),
- MovedAttribute("urljoin", "urlparse", "urllib.parse"),
- MovedAttribute("urlparse", "urlparse", "urllib.parse"),
- MovedAttribute("urlsplit", "urlparse", "urllib.parse"),
- MovedAttribute("urlunparse", "urlparse", "urllib.parse"),
- MovedAttribute("urlunsplit", "urlparse", "urllib.parse"),
- MovedAttribute("quote", "urllib", "urllib.parse"),
- MovedAttribute("quote_plus", "urllib", "urllib.parse"),
- MovedAttribute("unquote", "urllib", "urllib.parse"),
- MovedAttribute("unquote_plus", "urllib", "urllib.parse"),
- MovedAttribute("urlencode", "urllib", "urllib.parse"),
-]
-for attr in _urllib_parse_moved_attributes:
- setattr(Module_six_moves_urllib_parse, attr.name, attr)
-del attr
-
-sys.modules[__name__ + ".moves.urllib_parse"] = Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse")
-sys.modules[__name__ + ".moves.urllib.parse"] = Module_six_moves_urllib_parse(__name__ + ".moves.urllib.parse")
-
-
-class Module_six_moves_urllib_error(types.ModuleType):
- """Lazy loading of moved objects in six.moves.urllib_error"""
-
-
-_urllib_error_moved_attributes = [
- MovedAttribute("URLError", "urllib2", "urllib.error"),
- MovedAttribute("HTTPError", "urllib2", "urllib.error"),
- MovedAttribute("ContentTooShortError", "urllib", "urllib.error"),
-]
-for attr in _urllib_error_moved_attributes:
- setattr(Module_six_moves_urllib_error, attr.name, attr)
-del attr
-
-sys.modules[__name__ + ".moves.urllib_error"] = Module_six_moves_urllib_error(__name__ + ".moves.urllib_error")
-sys.modules[__name__ + ".moves.urllib.error"] = Module_six_moves_urllib_error(__name__ + ".moves.urllib.error")
-
-
-class Module_six_moves_urllib_request(types.ModuleType):
- """Lazy loading of moved objects in six.moves.urllib_request"""
-
-
-_urllib_request_moved_attributes = [
- MovedAttribute("urlopen", "urllib2", "urllib.request"),
- MovedAttribute("install_opener", "urllib2", "urllib.request"),
- MovedAttribute("build_opener", "urllib2", "urllib.request"),
- MovedAttribute("pathname2url", "urllib", "urllib.request"),
- MovedAttribute("url2pathname", "urllib", "urllib.request"),
- MovedAttribute("getproxies", "urllib", "urllib.request"),
- MovedAttribute("Request", "urllib2", "urllib.request"),
- MovedAttribute("OpenerDirector", "urllib2", "urllib.request"),
- MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"),
- MovedAttribute("ProxyHandler", "urllib2", "urllib.request"),
- MovedAttribute("BaseHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"),
- MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"),
- MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"),
- MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"),
- MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"),
- MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"),
- MovedAttribute("FileHandler", "urllib2", "urllib.request"),
- MovedAttribute("FTPHandler", "urllib2", "urllib.request"),
- MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"),
- MovedAttribute("UnknownHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"),
- MovedAttribute("urlretrieve", "urllib", "urllib.request"),
- MovedAttribute("urlcleanup", "urllib", "urllib.request"),
- MovedAttribute("URLopener", "urllib", "urllib.request"),
- MovedAttribute("FancyURLopener", "urllib", "urllib.request"),
-]
-for attr in _urllib_request_moved_attributes:
- setattr(Module_six_moves_urllib_request, attr.name, attr)
-del attr
-
-sys.modules[__name__ + ".moves.urllib_request"] = Module_six_moves_urllib_request(__name__ + ".moves.urllib_request")
-sys.modules[__name__ + ".moves.urllib.request"] = Module_six_moves_urllib_request(__name__ + ".moves.urllib.request")
-
-
-class Module_six_moves_urllib_response(types.ModuleType):
- """Lazy loading of moved objects in six.moves.urllib_response"""
-
-
-_urllib_response_moved_attributes = [
- MovedAttribute("addbase", "urllib", "urllib.response"),
- MovedAttribute("addclosehook", "urllib", "urllib.response"),
- MovedAttribute("addinfo", "urllib", "urllib.response"),
- MovedAttribute("addinfourl", "urllib", "urllib.response"),
-]
-for attr in _urllib_response_moved_attributes:
- setattr(Module_six_moves_urllib_response, attr.name, attr)
-del attr
-
-sys.modules[__name__ + ".moves.urllib_response"] = Module_six_moves_urllib_response(__name__ + ".moves.urllib_response")
-sys.modules[__name__ + ".moves.urllib.response"] = Module_six_moves_urllib_response(__name__ + ".moves.urllib.response")
-
-
-class Module_six_moves_urllib_robotparser(types.ModuleType):
- """Lazy loading of moved objects in six.moves.urllib_robotparser"""
-
-
-_urllib_robotparser_moved_attributes = [
- MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"),
-]
-for attr in _urllib_robotparser_moved_attributes:
- setattr(Module_six_moves_urllib_robotparser, attr.name, attr)
-del attr
-
-sys.modules[__name__ + ".moves.urllib_robotparser"] = Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib_robotparser")
-sys.modules[__name__ + ".moves.urllib.robotparser"] = Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser")
-
-
-class Module_six_moves_urllib(types.ModuleType):
- """Create a six.moves.urllib namespace that resembles the Python 3 namespace"""
- parse = sys.modules[__name__ + ".moves.urllib_parse"]
- error = sys.modules[__name__ + ".moves.urllib_error"]
- request = sys.modules[__name__ + ".moves.urllib_request"]
- response = sys.modules[__name__ + ".moves.urllib_response"]
- robotparser = sys.modules[__name__ + ".moves.urllib_robotparser"]
-
-
-sys.modules[__name__ + ".moves.urllib"] = Module_six_moves_urllib(__name__ + ".moves.urllib")
-
-
-def add_move(move):
- """Add an item to six.moves."""
- setattr(_MovedItems, move.name, move)
-
-
-def remove_move(name):
- """Remove item from six.moves."""
- try:
- delattr(_MovedItems, name)
- except AttributeError:
- try:
- del moves.__dict__[name]
- except KeyError:
- raise AttributeError("no such move, %r" % (name,))
-
-
-if PY3:
- _meth_func = "__func__"
- _meth_self = "__self__"
-
- _func_closure = "__closure__"
- _func_code = "__code__"
- _func_defaults = "__defaults__"
- _func_globals = "__globals__"
-
- _iterkeys = "keys"
- _itervalues = "values"
- _iteritems = "items"
- _iterlists = "lists"
-else:
- _meth_func = "im_func"
- _meth_self = "im_self"
-
- _func_closure = "func_closure"
- _func_code = "func_code"
- _func_defaults = "func_defaults"
- _func_globals = "func_globals"
-
- _iterkeys = "iterkeys"
- _itervalues = "itervalues"
- _iteritems = "iteritems"
- _iterlists = "iterlists"
-
-
-try:
- advance_iterator = next
-except NameError:
- def advance_iterator(it):
- return it.next()
-next = advance_iterator
-
-
-try:
- callable = callable
-except NameError:
- def callable(obj):
- return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
-
-
-if PY3:
- def get_unbound_function(unbound):
- return unbound
-
- create_bound_method = types.MethodType
-
- Iterator = object
-else:
- def get_unbound_function(unbound):
- return unbound.im_func
-
- def create_bound_method(func, obj):
- return types.MethodType(func, obj, obj.__class__)
-
- class Iterator(object):
-
- def next(self):
- return type(self).__next__(self)
-
- callable = callable
-_add_doc(get_unbound_function,
- """Get the function out of a possibly unbound function""")
-
-
-get_method_function = operator.attrgetter(_meth_func)
-get_method_self = operator.attrgetter(_meth_self)
-get_function_closure = operator.attrgetter(_func_closure)
-get_function_code = operator.attrgetter(_func_code)
-get_function_defaults = operator.attrgetter(_func_defaults)
-get_function_globals = operator.attrgetter(_func_globals)
-
-
-def iterkeys(d, **kw):
- """Return an iterator over the keys of a dictionary."""
- return iter(getattr(d, _iterkeys)(**kw))
-
-def itervalues(d, **kw):
- """Return an iterator over the values of a dictionary."""
- return iter(getattr(d, _itervalues)(**kw))
-
-def iteritems(d, **kw):
- """Return an iterator over the (key, value) pairs of a dictionary."""
- return iter(getattr(d, _iteritems)(**kw))
-
-def iterlists(d, **kw):
- """Return an iterator over the (key, [values]) pairs of a dictionary."""
- return iter(getattr(d, _iterlists)(**kw))
-
-
-if PY3:
- def b(s):
- return s.encode("latin-1")
- def u(s):
- return s
- unichr = chr
- if sys.version_info[1] <= 1:
- def int2byte(i):
- return bytes((i,))
- else:
- # This is about 2x faster than the implementation above on 3.2+
- int2byte = operator.methodcaller("to_bytes", 1, "big")
- byte2int = operator.itemgetter(0)
- indexbytes = operator.getitem
- iterbytes = iter
- import io
- StringIO = io.StringIO
- BytesIO = io.BytesIO
-else:
- def b(s):
- return s
- def u(s):
- return unicode(s, "unicode_escape")
- unichr = unichr
- int2byte = chr
- def byte2int(bs):
- return ord(bs[0])
- def indexbytes(buf, i):
- return ord(buf[i])
- def iterbytes(buf):
- return (ord(byte) for byte in buf)
- import StringIO
- StringIO = BytesIO = StringIO.StringIO
-_add_doc(b, """Byte literal""")
-_add_doc(u, """Text literal""")
-
-
-if PY3:
- exec_ = getattr(moves.builtins, "exec")
-
-
- def reraise(tp, value, tb=None):
- if value.__traceback__ is not tb:
- raise value.with_traceback(tb)
- raise value
-
-else:
- def exec_(_code_, _globs_=None, _locs_=None):
- """Execute code in a namespace."""
- if _globs_ is None:
- frame = sys._getframe(1)
- _globs_ = frame.f_globals
- if _locs_ is None:
- _locs_ = frame.f_locals
- del frame
- elif _locs_ is None:
- _locs_ = _globs_
- exec("""exec _code_ in _globs_, _locs_""")
-
-
- exec_("""def reraise(tp, value, tb=None):
- raise tp, value, tb
-""")
-
-
-print_ = getattr(moves.builtins, "print", None)
-if print_ is None:
- def print_(*args, **kwargs):
- """The new-style print function for Python 2.4 and 2.5."""
- fp = kwargs.pop("file", sys.stdout)
- if fp is None:
- return
- def write(data):
- if not isinstance(data, basestring):
- data = str(data)
- # If the file has an encoding, encode unicode with it.
- if (isinstance(fp, file) and
- isinstance(data, unicode) and
- fp.encoding is not None):
- errors = getattr(fp, "errors", None)
- if errors is None:
- errors = "strict"
- data = data.encode(fp.encoding, errors)
- fp.write(data)
- want_unicode = False
- sep = kwargs.pop("sep", None)
- if sep is not None:
- if isinstance(sep, unicode):
- want_unicode = True
- elif not isinstance(sep, str):
- raise TypeError("sep must be None or a string")
- end = kwargs.pop("end", None)
- if end is not None:
- if isinstance(end, unicode):
- want_unicode = True
- elif not isinstance(end, str):
- raise TypeError("end must be None or a string")
- if kwargs:
- raise TypeError("invalid keyword arguments to print()")
- if not want_unicode:
- for arg in args:
- if isinstance(arg, unicode):
- want_unicode = True
- break
- if want_unicode:
- newline = unicode("\n")
- space = unicode(" ")
- else:
- newline = "\n"
- space = " "
- if sep is None:
- sep = space
- if end is None:
- end = newline
- for i, arg in enumerate(args):
- if i:
- write(sep)
- write(arg)
- write(end)
-
-_add_doc(reraise, """Reraise an exception.""")
-
-
-def with_metaclass(meta, *bases):
- """Create a base class with a metaclass."""
- return meta("NewBase", bases, {})
-
-def add_metaclass(metaclass):
- """Class decorator for creating a class with a metaclass."""
- def wrapper(cls):
- orig_vars = cls.__dict__.copy()
- orig_vars.pop('__dict__', None)
- orig_vars.pop('__weakref__', None)
- for slots_var in orig_vars.get('__slots__', ()):
- orig_vars.pop(slots_var)
- return metaclass(cls.__name__, cls.__bases__, orig_vars)
- return wrapper
diff --git a/futurize.py b/futurize.py
index b69785a2..09feaf59 100755
--- a/futurize.py
+++ b/futurize.py
@@ -3,23 +3,22 @@
futurize.py
===========
-Like Armin Ronacher's ``modernize.py``, but using the ``future`` package rather than a direct dependency on ``six``'.
+This script is only used by the unit tests. Another script called
+"futurize" is created automatically (without the .py extension) by
+setuptools.
-futurize.py attempts to turn Py2 code into valid, clean Py3 code that is also
-compatible with Py2 when using the ``future`` package.
+futurize.py attempts to turn Py2 code into valid, clean Py3 code that is
+also compatible with Py2 when using the ``future`` package.
Licensing
---------
-Copyright 2013 Python Charmers Pty Ltd, Australia.
+Copyright 2013-2024 Python Charmers, Australia.
The software is distributed under an MIT licence. See LICENSE.txt.
"""
-import os
+import sys
from libfuturize.main import main
-# We use os._exit() because sys.exit() seems to interact badly with
-# subprocess.check_output() ...
-os._exit(main())
-
+sys.exit(main())
diff --git a/libfuturize/fixes2/fix_raise.py b/libfuturize/fixes2/fix_raise.py
deleted file mode 100644
index 3e8323de..00000000
--- a/libfuturize/fixes2/fix_raise.py
+++ /dev/null
@@ -1,73 +0,0 @@
-"""Fixer for 'raise E, V'
-
-From Armin Ronacher's ``python-modernize``.
-
-raise -> raise
-raise E -> raise E
-raise E, V -> raise E(V)
-
-raise (((E, E'), E''), E'''), V -> raise E(V)
-
-
-CAVEATS:
-1) "raise E, V" will be incorrectly translated if V is an exception
- instance. The correct Python 3 idiom is
-
- raise E from V
-
- but since we can't detect instance-hood by syntax alone and since
- any client code would have to be changed as well, we don't automate
- this.
-"""
-# Author: Collin Winter, Armin Ronacher
-
-# Local imports
-from lib2to3 import pytree, fixer_base
-from lib2to3.pgen2 import token
-from lib2to3.fixer_util import Name, Call, is_tuple
-
-class FixRaise(fixer_base.BaseFix):
-
- BM_compatible = True
- PATTERN = """
- raise_stmt< 'raise' exc=any [',' val=any] >
- """
-
- def transform(self, node, results):
- syms = self.syms
-
- exc = results["exc"].clone()
- if exc.type == token.STRING:
- msg = "Python 3 does not support string exceptions"
- self.cannot_convert(node, msg)
- return
-
- # Python 2 supports
- # raise ((((E1, E2), E3), E4), E5), V
- # as a synonym for
- # raise E1, V
- # Since Python 3 will not support this, we recurse down any tuple
- # literals, always taking the first element.
- if is_tuple(exc):
- while is_tuple(exc):
- # exc.children[1:-1] is the unparenthesized tuple
- # exc.children[1].children[0] is the first element of the tuple
- exc = exc.children[1].children[0].clone()
- exc.prefix = u" "
-
- if "val" not in results:
- # One-argument raise
- new = pytree.Node(syms.raise_stmt, [Name(u"raise"), exc])
- new.prefix = node.prefix
- return new
-
- val = results["val"].clone()
- if is_tuple(val):
- args = [c.clone() for c in val.children[1:-1]]
- else:
- val.prefix = u""
- args = [val]
-
- return pytree.Node(syms.raise_stmt,
- [Name(u"raise"), Call(exc, args)],
- prefix=node.prefix)
diff --git a/libfuturize/fixes3/__init__.py b/libfuturize/fixes3/__init__.py
deleted file mode 100644
index adbf10d9..00000000
--- a/libfuturize/fixes3/__init__.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import sys
-from lib2to3 import refactor
-
-# The original set of these fixes comes from lib3to2 (https://bitbucket.org/amentajo/lib3to2):
-libfuturize_3fix_names = set([
- 'libfuturize.fixes2.fix_add__future__imports', # from __future__ import absolute_import etc. on separate lines
- 'libfuturize.fixes2.fix_add_future_standard_library_import', # we force adding this import for now, even if it doesn't seem necessary to the fix_future_standard_library fixer, for ease of testing
- 'libfuturize.fixes2.fix_order___future__imports', # consolidates to a single line to simplify testing
- 'libfuturize.fixes3.fix_future_builtins', # adds "from future.builtins import *"
- 'libfuturize.fixes2.fix_future_standard_library', # adds "from future import standard_library"
-
- 'libfuturize.fixes3.fix_annotations',
- # 'libfuturize.fixes3.fix_bitlength', # ints have this in Py2.7
- # 'libfuturize.fixes3.fix_bool', # need a decorator or Mixin
- # 'libfuturize.fixes3.fix_bytes', # leave bytes as bytes
- # 'libfuturize.fixes3.fix_classdecorator', # available in
- # Py2.6+
- # 'libfuturize.fixes3.fix_collections', hmmm ...
- # 'libfuturize.fixes3.fix_dctsetcomp', # avail in Py27
- 'libfuturize.fixes3.fix_division', # yes
- # 'libfuturize.fixes3.fix_except', # avail in Py2.6+
- # 'libfuturize.fixes3.fix_features', # ?
- 'libfuturize.fixes3.fix_fullargspec',
- # 'libfuturize.fixes3.fix_funcattrs',
- 'libfuturize.fixes3.fix_getcwd',
- 'libfuturize.fixes3.fix_imports', # adds "from future import standard_library"
- 'libfuturize.fixes3.fix_imports2',
- # 'libfuturize.fixes3.fix_input',
- # 'libfuturize.fixes3.fix_int',
- # 'libfuturize.fixes3.fix_intern',
- # 'libfuturize.fixes3.fix_itertools',
- 'libfuturize.fixes3.fix_kwargs', # yes, we want this
- # 'libfuturize.fixes3.fix_memoryview',
- # 'libfuturize.fixes3.fix_metaclass', # write a custom handler for
- # this
- # 'libfuturize.fixes3.fix_methodattrs', # __func__ and __self__ seem to be defined on Py2.7 already
- 'libfuturize.fixes3.fix_newstyle', # yes, we want this: explicit inheritance from object. Without new-style classes in Py2, super() will break etc.
- # 'libfuturize.fixes3.fix_next', # use a decorator for this
- # 'libfuturize.fixes3.fix_numliterals', # prob not
- # 'libfuturize.fixes3.fix_open', # huh?
- # 'libfuturize.fixes3.fix_print', # no way
- 'libfuturize.fixes3.fix_printfunction', # adds __future__ import print_function
- 'libfuturize.fixes3.fix_raise', # yes, if 'raise E, V, T' is supported on Py3
- # 'libfuturize.fixes3.fix_range', # nope
- # 'libfuturize.fixes3.fix_reduce',
- # 'libfuturize.fixes3.fix_setliteral',
- # 'libfuturize.fixes3.fix_str',
- # 'libfuturize.fixes3.fix_super', # maybe, if our magic super() isn't robust enough
- 'libfuturize.fixes3.fix_throw', # yes, if Py3 supports it
- # 'libfuturize.fixes3.fix_unittest',
- 'libfuturize.fixes3.fix_unpacking', # yes, this is useful
- # 'libfuturize.fixes3.fix_with' # way out of date
- ])
-
diff --git a/libfuturize/fixes3/fix_future_builtins.py b/libfuturize/fixes3/fix_future_builtins.py
deleted file mode 100644
index d8229806..00000000
--- a/libfuturize/fixes3/fix_future_builtins.py
+++ /dev/null
@@ -1,11 +0,0 @@
-"""
-For the ``future`` package.
-
-Adds this import line:
-
- from future.builtins import *
-
-after any other imports (in an initial block of them).
-"""
-
-from ..fixes2.fix_future_builtins import FixFutureBuiltins
diff --git a/libfuturize/fixes3/fix_newstyle.py b/libfuturize/fixes3/fix_newstyle.py
deleted file mode 100644
index 6420e94d..00000000
--- a/libfuturize/fixes3/fix_newstyle.py
+++ /dev/null
@@ -1,22 +0,0 @@
-u"""
-Fixer for "class Foo: ..." -> "class Foo(object): ..."
-"""
-
-from lib2to3 import fixer_base
-from lib2to3.fixer_util import Node, Leaf, token, syms, LParen, RParen, Name
-# from lib2to3.fixer_util import Name, syms, Node, Leaf, Newline, find_root
-from lib2to3.pygram import token
-
-def insert_object(node, idx):
- node.insert_child(idx, RParen())
- node.insert_child(idx, Name(u"object"))
- node.insert_child(idx, LParen())
-
-class FixNewstyle(fixer_base.BaseFix):
-
- PATTERN = u"classdef< 'class' NAME colon=':' any >"
-
- def transform(self, node, results):
- colon = results[u"colon"]
- idx = node.children.index(colon)
- insert_object(node, idx)
diff --git a/libfuturize/main.py b/libfuturize/main.py
deleted file mode 100644
index 6e5919c6..00000000
--- a/libfuturize/main.py
+++ /dev/null
@@ -1,221 +0,0 @@
-"""
-futurize: automatic conversion to clean 2&3 code using ``python-future``
-======================================================================
-
-Like Armin Ronacher's modernize.py, ``futurize`` attempts to produce clean
-standard Python 3 code that runs on both Py2 and Py3.
-
-One pass
---------
-
-Use it like this on Python 2 code:
-
- $ futurize --verbose mypython2script.py
-
-This will attempt to port the code to standard Py3 code that also
-provides Py2 compatibility with the help of the right imports from
-``future``. To write the changes to disk, use the -w flag.
-
-Or, to make existing Python 3 code compatible with both Python 2 and 3
-using the ``future`` package:
-
- $ futurize --from3 --verbose mypython3script.py
-
-which removes any Py3-only syntax (e.g. new metaclasses) and adds these
-import lines:
-
- from __future__ import (absolute_import, division,
- print_function, unicode_literals)
- from future import standard_library
- from future.builtins import *
-
-To write changes to the files, use the -w flag.
-
-Two stages
-----------
-
-The ``futurize`` script can also be called in two separate stages. First:
-
- $ futurize --stage1 mypython2script.py
-
-This produces more modern Python 2 code that is not yet compatible with Python
-3. The tests should still run and the diff should be uncontroversial to apply to
-most Python projects that are willing to drop support for Python 2.5 and lower.
-
-After this, the recommended approach is to explicitly mark all strings that must
-be byte-strings with a b'' prefix, and then invoke the second stage with:
-
- $ futurize --stage2 mypython2script.py
-
-This implicitly turns all unadorned string literals into unicode strings (Py3
-str) and makes the additional changes needed to support Python 3. This stage
-introduces a dependency on ``future`` to restore Py2 support.
-
-If you would prefer instead to mark all your text strings explicitly with u''
-prefixes and have all unadorned '' strings converted to byte-strings, use this:
-
- $ futurize --stage2 --tobytes mypython2script.py
-
-Note that this even includes docstrings.
-
-Separate stages are not available (or needed) when converting from Python 3.
-"""
-
-from __future__ import (absolute_import, print_function, unicode_literals)
-from future.builtins import *
-
-import sys
-import logging
-import optparse
-
-from lib2to3.main import main, warn, StdoutRefactoringTool
-from lib2to3 import refactor
-
-from libfuturize.fixes2 import (lib2to3_fix_names_stage1,
- lib2to3_fix_names_stage2,
- libfuturize_2fix_names_stage1,
- libfuturize_2fix_names_stage2)
-from libfuturize.fixes3 import libfuturize_3fix_names
-
-
-def main(args=None):
- """Main program.
-
- Returns a suggested exit status (0, 1, 2).
- """
- # Set up option parser
- parser = optparse.OptionParser(usage="futurize [options] file|dir ...")
- parser.add_option("-a", "--all-imports", action="store_true",
- help="Adds all __future__ and future imports to each module")
- parser.add_option("-d", "--doctests_only", action="store_true",
- help="Fix up doctests only")
- parser.add_option("-b", "--tobytes", action="store_true",
- help="Convert all unadorned string literals to bytes objects")
- parser.add_option("-1", "--stage1", action="store_true",
- help="Modernize Python 2 code only; no compatibility with Python 3 (or dependency on ``future``)")
- parser.add_option("-2", "--stage2", action="store_true",
- help="Take modernized (stage1) code and add a dependency on ``future`` to provide Py3 compatibility.")
- parser.add_option("-0", "--both-stages", action="store_true",
- help="Apply both stages 1 and 2")
- # parser.add_option("-f", "--fix", action="append", default=[],
- # help="Each FIX specifies a transformation; default: all")
- parser.add_option("-j", "--processes", action="store", default=1,
- type="int", help="Run 2to3 concurrently")
- parser.add_option("-x", "--nofix", action="append", default=[],
- help="Prevent a fixer from being run.")
- parser.add_option("-l", "--list-fixes", action="store_true",
- help="List available transformations")
- # parser.add_option("-p", "--print-function", action="store_true",
- # help="Modify the grammar so that print() is a function")
- parser.add_option("-v", "--verbose", action="store_true",
- help="More verbose logging")
- parser.add_option("--no-diffs", action="store_true",
- help="Don't show diffs of the refactoring")
- parser.add_option("-w", "--write", action="store_true",
- help="Write back modified files")
- parser.add_option("-n", "--nobackups", action="store_true", default=False,
- help="Don't write backups for modified files.")
- parser.add_option("--from3", action="store_true", default=False,
- help="Assume the code is already Python 3 and just "
- "requires ``__future__`` and ``future`` imports.")
-
- # Parse command line arguments
- refactor_stdin = False
- flags = {}
- options, args = parser.parse_args(args)
- if options.from3:
- assert not (options.stage1 or options.stage2)
- assert not options.tobytes
- fixer_pkg = 'libfuturize.fixes3'
- avail_fixes = libfuturize_3fix_names
- flags["print_function"] = True
- else:
- fixer_pkg = 'libfuturize.fixes2'
- avail_fixes = set()
- if not (options.stage1 or options.stage2):
- options.both_stages = True
- else:
- assert options.both_stages is None
- options.both_stages = False
- if options.stage1 or options.both_stages:
- avail_fixes.update(lib2to3_fix_names_stage1)
- avail_fixes.update(libfuturize_2fix_names_stage1)
- if options.stage2 or options.both_stages:
- avail_fixes.update(lib2to3_fix_names_stage2)
- avail_fixes.update(libfuturize_2fix_names_stage2)
-
- if options.tobytes:
- avail_fixes.add('libfuturize.fixes2.fix_bytes')
- if not options.write and options.no_diffs:
- warn("not writing files and not printing diffs; that's not very useful")
- if not options.write and options.nobackups:
- parser.error("Can't use -n without -w")
- if options.list_fixes:
- print("Available transformations for the -f/--fix option:")
- for fixname in sorted(avail_fixes):
- print(fixname)
- if not args:
- return 0
- if not args:
- print("At least one file or directory argument required.",
- file=sys.stderr)
- print("Use --help to show usage.", file=sys.stderr)
- return 2
- if "-" in args:
- refactor_stdin = True
- if options.write:
- print("Can't write to stdin.", file=sys.stderr)
- return 2
-
- # If this option were ever needed, it would probably mean the --from3 flag
- # had been forgotten.
- # if options.print_function:
- # flags["print_function"] = True
-
- # Set up logging handler
- level = logging.DEBUG if options.verbose else logging.INFO
- logging.basicConfig(format='%(name)s: %(message)s', level=level)
-
- # Initialize the refactoring tool
- unwanted_fixes = set(fixer_pkg + ".fix_" + fix for fix in options.nofix)
-
- # The 'all-imports' option forces adding all imports __future__ and "from
- # future import standard_library", even if they don't seem necessary for
- # the current state of each module. (This can simplify testing, and can
- # reduce the need to think about Py2 compatibility when editing the code
- # further.)
- extra_fixes = set()
- if options.all_imports:
- prefix = 'libfuturize.fixes2.'
- if options.stage1:
- extra_fixes.add(prefix +
- 'fix_add__future__imports_except_unicode_literals')
- else:
- # In case the user hasn't run stage1 for some reason:
- extra_fixes.add(prefix + 'fix_add__future__imports')
- extra_fixes.add(prefix + 'fix_add_future_standard_library_import')
- extra_fixes.add(prefix + 'fix_add_all_future_builtins')
-
- fixer_names = avail_fixes | extra_fixes - unwanted_fixes
-
- rt = StdoutRefactoringTool(sorted(fixer_names), flags, set(),
- options.nobackups, not options.no_diffs)
-
- # Refactor all files and directories passed as arguments
- if not rt.errors:
- if refactor_stdin:
- rt.refactor_stdin()
- else:
- try:
- rt.refactor(args, options.write, options.doctests_only,
- options.processes)
- except refactor.MultiprocessingUnsupported:
- assert options.processes > 1
- print("Sorry, -j isn't " \
- "supported on this platform.", file=sys.stderr)
- return 1
- rt.summarize()
-
- # Return error status (0 if rt.errors is zero)
- return int(bool(rt.errors))
-
diff --git a/libfuturize/test_scripts/py2/check_super_2to3.py b/libfuturize/test_scripts/py2/check_super_2to3.py
deleted file mode 100644
index f46e03df..00000000
--- a/libfuturize/test_scripts/py2/check_super_2to3.py
+++ /dev/null
@@ -1,11 +0,0 @@
-"""
-This tests whether futurize keeps the old two-argument super() calls the
-same as before. It should, because this still works in Py3.
-"""
-from __future__ import print_function
-from future.builtins import *
-
-class VerboseList(list):
- def append(self, item):
- print('Adding an item')
- super(VerboseList, self).append(item)
diff --git a/libfuturize/test_scripts/py2/check_super_2to3.py2 b/libfuturize/test_scripts/py2/check_super_2to3.py2
deleted file mode 100644
index 65b24354..00000000
--- a/libfuturize/test_scripts/py2/check_super_2to3.py2
+++ /dev/null
@@ -1,9 +0,0 @@
-"""
-This tests whether futurize keeps the old two-argument super() calls the
-same as before. It should, because this still works in Py3.
-"""
-
-class VerboseList(list):
- def append(self, item):
- print 'Adding an item'
- super(VerboseList, self).append(item)
diff --git a/libfuturize/test_scripts/py2/implicit_relative_import.py b/libfuturize/test_scripts/py2/implicit_relative_import.py
deleted file mode 100644
index 6653d744..00000000
--- a/libfuturize/test_scripts/py2/implicit_relative_import.py
+++ /dev/null
@@ -1,8 +0,0 @@
-'''
-Tests whether implicit relative imports are turned into explicit ones.
-'''
-
-from __future__ import absolute_import
-from future.builtins import *
-
-from . import xrange
diff --git a/libfuturize/test_scripts/py2/implicit_relative_import.py2 b/libfuturize/test_scripts/py2/implicit_relative_import.py2
deleted file mode 100644
index 3af07a00..00000000
--- a/libfuturize/test_scripts/py2/implicit_relative_import.py2
+++ /dev/null
@@ -1,5 +0,0 @@
-'''
-Tests whether implicit relative imports are turned into explicit ones.
-'''
-
-import xrange
diff --git a/libfuturize/test_scripts/py2/old_exception_print.py b/libfuturize/test_scripts/py2/old_exception_print.py
deleted file mode 100644
index 2e90958d..00000000
--- a/libfuturize/test_scripts/py2/old_exception_print.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""
-Test of whether futurize handles the old-style exception Syntax
-"""
-
-from __future__ import print_function
-from future.builtins import *
-
-def hello():
- try:
- print("Hello, world")
- except IOError as e:
- print(e.errno)
diff --git a/libfuturize/test_scripts/py2/old_exception_print.py2 b/libfuturize/test_scripts/py2/old_exception_print.py2
deleted file mode 100644
index cc8621d5..00000000
--- a/libfuturize/test_scripts/py2/old_exception_print.py2
+++ /dev/null
@@ -1,8 +0,0 @@
-"""
-Test of whether futurize handles the old-style exception Syntax
-"""
-def hello():
- try:
- print "Hello, world"
- except IOError, e:
- print e.errno
diff --git a/libfuturize/test_scripts/py2/print_range.py b/libfuturize/test_scripts/py2/print_range.py
deleted file mode 100644
index ff0b2c61..00000000
--- a/libfuturize/test_scripts/py2/print_range.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from __future__ import print_function
-from future.builtins import *
-
-print(list(range(10)))
diff --git a/libfuturize/test_scripts/py2/print_range.py2 b/libfuturize/test_scripts/py2/print_range.py2
deleted file mode 100644
index 78e3caa1..00000000
--- a/libfuturize/test_scripts/py2/print_range.py2
+++ /dev/null
@@ -1 +0,0 @@
-print range(10)
diff --git a/libfuturize/test_scripts/py2/print_raw_input.py b/libfuturize/test_scripts/py2/print_raw_input.py
deleted file mode 100644
index c477c7be..00000000
--- a/libfuturize/test_scripts/py2/print_raw_input.py
+++ /dev/null
@@ -1,15 +0,0 @@
-"""
-Example Python 2 code with print statement and raw_input().
-
-Check: does libfuturize automatically handle this?
-"""
-
-from __future__ import print_function
-from future.builtins import *
-
-def greet(name):
- print("Hello, {0}!".format(name))
-
-print("What's your name?")
-name = input()
-greet(name)
diff --git a/libfuturize/test_scripts/py2/print_raw_input.py2 b/libfuturize/test_scripts/py2/print_raw_input.py2
deleted file mode 100644
index d0164912..00000000
--- a/libfuturize/test_scripts/py2/print_raw_input.py2
+++ /dev/null
@@ -1,12 +0,0 @@
-"""
-Example Python 2 code with print statement and raw_input().
-
-Check: does libfuturize automatically handle this?
-"""
-
-def greet(name):
- print "Hello, {0}!".format(name)
-
-print "What's your name?"
-name = raw_input()
-greet(name)
diff --git a/libfuturize/test_scripts/py2/print_stderr.py b/libfuturize/test_scripts/py2/print_stderr.py
deleted file mode 100644
index 944f9462..00000000
--- a/libfuturize/test_scripts/py2/print_stderr.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from __future__ import print_function
-from future.builtins import *
-
-import sys
-print('Hello', ' Ed', file=sys.stderr)
-
diff --git a/libfuturize/test_scripts/py2/print_stderr.py2 b/libfuturize/test_scripts/py2/print_stderr.py2
deleted file mode 100644
index 6ab90340..00000000
--- a/libfuturize/test_scripts/py2/print_stderr.py2
+++ /dev/null
@@ -1,3 +0,0 @@
-import sys
-print >> sys.stderr, 'Hello', ' Ed'
-
diff --git a/libfuturize/test_scripts/py2/problematic_string.py b/libfuturize/test_scripts/py2/problematic_string.py
deleted file mode 100644
index e8cc6399..00000000
--- a/libfuturize/test_scripts/py2/problematic_string.py
+++ /dev/null
@@ -1,5 +0,0 @@
-r""" This docstring generates a SyntaxError on Python 3 unless it has
-an r prefix.
-
-The folder is "C:\Users".
-"""
diff --git a/libfuturize/test_scripts/py2/problematic_string.py2 b/libfuturize/test_scripts/py2/problematic_string.py2
deleted file mode 100644
index e0784abc..00000000
--- a/libfuturize/test_scripts/py2/problematic_string.py2
+++ /dev/null
@@ -1,5 +0,0 @@
-""" This docstring generates a SyntaxError on Python 3 unless it has
-an r prefix.
-
-The folder is "C:\Users".
-"""
diff --git a/libfuturize/test_scripts/py2/source_coding_latin1.py b/libfuturize/test_scripts/py2/source_coding_latin1.py
deleted file mode 100644
index 9630515e..00000000
--- a/libfuturize/test_scripts/py2/source_coding_latin1.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# -*- coding: latin-1 -*-
-'''
-Tests to ensure that the source coding line is not corrupted or removed.
-Also tests whether the unicode characters in this encoding are parsed
-correctly and left alone.
-'''
-
-characters = [u"", u"", ""]
diff --git a/libfuturize/test_scripts/py2/source_coding_latin1.py2 b/libfuturize/test_scripts/py2/source_coding_latin1.py2
deleted file mode 100644
index 9630515e..00000000
--- a/libfuturize/test_scripts/py2/source_coding_latin1.py2
+++ /dev/null
@@ -1,8 +0,0 @@
-# -*- coding: latin-1 -*-
-'''
-Tests to ensure that the source coding line is not corrupted or removed.
-Also tests whether the unicode characters in this encoding are parsed
-correctly and left alone.
-'''
-
-characters = [u"", u"", ""]
diff --git a/libfuturize/test_scripts/py2/source_coding_utf8.py b/libfuturize/test_scripts/py2/source_coding_utf8.py
deleted file mode 100644
index 4fefa76d..00000000
--- a/libfuturize/test_scripts/py2/source_coding_utf8.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Tests to ensure that the source coding line is not corrupted or removed.
-Also tests whether the unicode characters in this encoding are parsed
-correctly and left alone.
-'''
-
-icons = [u"◐", u"◓", u"◑", u"◒"]
diff --git a/libfuturize/test_scripts/py2/source_coding_utf8.py2 b/libfuturize/test_scripts/py2/source_coding_utf8.py2
deleted file mode 100644
index 4fefa76d..00000000
--- a/libfuturize/test_scripts/py2/source_coding_utf8.py2
+++ /dev/null
@@ -1,8 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Tests to ensure that the source coding line is not corrupted or removed.
-Also tests whether the unicode characters in this encoding are parsed
-correctly and left alone.
-'''
-
-icons = [u"◐", u"◓", u"◑", u"◒"]
diff --git a/libfuturize/test_scripts/py2/stdlib_newstyleclass_printstatement.py b/libfuturize/test_scripts/py2/stdlib_newstyleclass_printstatement.py
deleted file mode 100644
index 2f9a52f4..00000000
--- a/libfuturize/test_scripts/py2/stdlib_newstyleclass_printstatement.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from __future__ import print_function
-from future import standard_library
-from future.builtins import *
-
-import configparser
-
-class Blah(object):
- pass
-print('Hello', end=None)
diff --git a/libfuturize/test_scripts/py2/stdlib_newstyleclass_printstatement.py2 b/libfuturize/test_scripts/py2/stdlib_newstyleclass_printstatement.py2
deleted file mode 100644
index 42089aac..00000000
--- a/libfuturize/test_scripts/py2/stdlib_newstyleclass_printstatement.py2
+++ /dev/null
@@ -1,5 +0,0 @@
-import ConfigParser
-
-class Blah(object):
- pass
-print 'Hello',
diff --git a/libfuturize/test_scripts/py2/stdlib_print_statement.py b/libfuturize/test_scripts/py2/stdlib_print_statement.py
deleted file mode 100644
index 23374c77..00000000
--- a/libfuturize/test_scripts/py2/stdlib_print_statement.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from __future__ import absolute_import
-from __future__ import print_function
-from future import standard_library
-from future.builtins import *
-
-import socketserver
-print('blah')
diff --git a/libfuturize/test_scripts/py2/stdlib_print_statement.py2 b/libfuturize/test_scripts/py2/stdlib_print_statement.py2
deleted file mode 100644
index 8d7d13ea..00000000
--- a/libfuturize/test_scripts/py2/stdlib_print_statement.py2
+++ /dev/null
@@ -1,2 +0,0 @@
-import SocketServer
-print 'blah'
diff --git a/libfuturize/test_scripts/py2/stdlib_print_statement___future___absolute_import.py b/libfuturize/test_scripts/py2/stdlib_print_statement___future___absolute_import.py
deleted file mode 100644
index 550abcf1..00000000
--- a/libfuturize/test_scripts/py2/stdlib_print_statement___future___absolute_import.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""
-Tests whether the existing __future__ statement is preserved and not
-duplicated or moved below some executable statement.
-"""
-
-from __future__ import absolute_import
-from __future__ import print_function
-
-import socketserver
-print('blah')
diff --git a/libfuturize/test_scripts/py2/stdlib_print_statement___future___absolute_import.py2 b/libfuturize/test_scripts/py2/stdlib_print_statement___future___absolute_import.py2
deleted file mode 100644
index 17177cdc..00000000
--- a/libfuturize/test_scripts/py2/stdlib_print_statement___future___absolute_import.py2
+++ /dev/null
@@ -1,7 +0,0 @@
-"""
-Tests whether the existing __future__ statement is preserved and not
-duplicated or moved below some executable statement.
-"""
-
-import SocketServer
-print 'blah'
diff --git a/libfuturize/test_scripts/py2/unicode_literals.py b/libfuturize/test_scripts/py2/unicode_literals.py
deleted file mode 100644
index d82a82a5..00000000
--- a/libfuturize/test_scripts/py2/unicode_literals.py
+++ /dev/null
@@ -1,11 +0,0 @@
-'''
-Tests to ensure that the u'' and b'' prefixes on unicode and byte strings
-are not removed. Removing the prefixes on Py3.3+ is unnecessary and
-loses some information -- namely, that the strings have explicitly been
-marked as unicode, rather than just our guess (perhaps incorrect) that
-they should be unicode or bytes.
-'''
-
-s = u'mystring'
-b = b'mybytes'
-icons = [u"◐", u"◓", u"◑", u"◒"]
diff --git a/libfuturize/test_scripts/py2/unicode_literals.py2 b/libfuturize/test_scripts/py2/unicode_literals.py2
deleted file mode 100644
index d82a82a5..00000000
--- a/libfuturize/test_scripts/py2/unicode_literals.py2
+++ /dev/null
@@ -1,11 +0,0 @@
-'''
-Tests to ensure that the u'' and b'' prefixes on unicode and byte strings
-are not removed. Removing the prefixes on Py3.3+ is unnecessary and
-loses some information -- namely, that the strings have explicitly been
-marked as unicode, rather than just our guess (perhaps incorrect) that
-they should be unicode or bytes.
-'''
-
-s = u'mystring'
-b = b'mybytes'
-icons = [u"◐", u"◓", u"◑", u"◒"]
diff --git a/libfuturize/test_scripts/py2/xrange.py b/libfuturize/test_scripts/py2/xrange.py
deleted file mode 100644
index a3379b7f..00000000
--- a/libfuturize/test_scripts/py2/xrange.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from future.builtins import *
-
-for i in range(10):
- pass
diff --git a/libfuturize/test_scripts/py2/xrange.py2 b/libfuturize/test_scripts/py2/xrange.py2
deleted file mode 100644
index 3126104c..00000000
--- a/libfuturize/test_scripts/py2/xrange.py2
+++ /dev/null
@@ -1,3 +0,0 @@
-
-for i in xrange(10):
- pass
diff --git a/libfuturize/test_scripts/py3/example_py3_raw_input.py b/libfuturize/test_scripts/py3/example_py3_raw_input.py
deleted file mode 100644
index d294aa7d..00000000
--- a/libfuturize/test_scripts/py3/example_py3_raw_input.py
+++ /dev/null
@@ -1,14 +0,0 @@
-"""
-Example Python 3 code
-
-Does libfuturize --from3 handle this, or does it add an evil eval() to the
-input() call?
-
-It should also add 'from __future__ import print_function'
-"""
-def greet(name):
- print("Hello, {0}!".format(name))
-
-print("What's your name?")
-name = input()
-greet(name)
diff --git a/libfuturize/test_scripts/py3/implrelimport_printfunction.py b/libfuturize/test_scripts/py3/implrelimport_printfunction.py
deleted file mode 100644
index bf96e8e1..00000000
--- a/libfuturize/test_scripts/py3/implrelimport_printfunction.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import emptymodule
-print('Hello', ' Ed', file=sys.stderr)
-
diff --git a/libfuturize/test_scripts/py3/imports_with_existing___future__import.py b/libfuturize/test_scripts/py3/imports_with_existing___future__import.py
deleted file mode 100644
index 2d05bceb..00000000
--- a/libfuturize/test_scripts/py3/imports_with_existing___future__import.py
+++ /dev/null
@@ -1,22 +0,0 @@
-"""
-An example Python 3 script with an existing __future__ import.
-We don't want libfuturize to clobber or duplicate this ...
-"""
-
-from __future__ import absolute_import
-
-import urllib.parse
-import urllib.request
-import urllib.error
-import http.client
-import email.message
-import io
-import unittest
-from test import support
-import os
-import sys
-import tempfile
-
-from base64 import b64encode
-import collections
-
diff --git a/libfuturize/test_scripts/py3/kwonlyargs.py b/libfuturize/test_scripts/py3/kwonlyargs.py
deleted file mode 100644
index 686eec8a..00000000
--- a/libfuturize/test_scripts/py3/kwonlyargs.py
+++ /dev/null
@@ -1,3 +0,0 @@
-def f(a, b, *, c=True, d='blah'):
- pass
-
diff --git a/libfuturize/test_scripts/py3/print_range.py b/libfuturize/test_scripts/py3/print_range.py
deleted file mode 100644
index 61b4000c..00000000
--- a/libfuturize/test_scripts/py3/print_range.py
+++ /dev/null
@@ -1 +0,0 @@
-print(range(10))
diff --git a/libfuturize/test_scripts/py3/stdlib_newstyleclass_printfunction.py b/libfuturize/test_scripts/py3/stdlib_newstyleclass_printfunction.py
deleted file mode 100644
index cc5763b9..00000000
--- a/libfuturize/test_scripts/py3/stdlib_newstyleclass_printfunction.py
+++ /dev/null
@@ -1,5 +0,0 @@
-import configparser
-
-class Blah:
- pass
-print('Hello', end=None)
diff --git a/libfuturize/test_scripts/py3/test_py3_urllib_request.py b/libfuturize/test_scripts/py3/test_py3_urllib_request.py
deleted file mode 100644
index 2051338b..00000000
--- a/libfuturize/test_scripts/py3/test_py3_urllib_request.py
+++ /dev/null
@@ -1,18 +0,0 @@
-"""
-Example Python 3 code using the new urllib.request module.
-
-Does libfuturize handle this?
-"""
-URL = 'http://pypi.python.org/pypi/{}/json'
-
-package = 'future'
-
-import pprint
-# import requests
-#
-# r = requests.get(URL.format(package))
-# pprint.pprint(r.json())
-
-import urllib.request
-r = urllib.request.urlopen(URL.format(package_name))
-pprint.pprint(r.read())
diff --git a/pasteurize.py b/pasteurize.py
new file mode 100755
index 00000000..658955f6
--- /dev/null
+++ b/pasteurize.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+"""
+pasteurize.py
+=============
+
+This script is only used by the unit tests. Another script called "pasteurize"
+is created automatically (without the .py extension) by setuptools.
+
+pasteurize.py attempts to turn Py3 code into relatively clean Py3 code that is
+also compatible with Py2 when using the ``future`` package.
+
+
+Licensing
+---------
+Copyright 2013-2024 Python Charmers, Australia.
+The software is distributed under an MIT licence. See LICENSE.txt.
+"""
+
+import sys
+
+from libpasteurize.main import main
+
+sys.exit(main())
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 00000000..649908f0
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,3 @@
+# py.test config file
+[pytest]
+norecursedirs = build docs/_build disabled_test_email disabled_test_xmlrpc disabled_test_xmlrpcnet disabled/* disabled* disabled/test_email/*
diff --git a/requirements_py26.txt b/requirements_py26.txt
deleted file mode 100644
index b2ed2669..00000000
--- a/requirements_py26.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-unittest2
-argparse # for the http.server module
-
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 00000000..498ec14a
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,2 @@
+[metadata]
+license_file = LICENSE.txt
diff --git a/setup.py b/setup.py
old mode 100644
new mode 100755
index 47783245..13b0f435
--- a/setup.py
+++ b/setup.py
@@ -1,82 +1,184 @@
#!/usr/bin/env python
+from __future__ import absolute_import, print_function
+
import os
+import os.path
import sys
+
+
try:
from setuptools import setup
except ImportError:
from distutils.core import setup
-import future
-
-
-if sys.argv[-1] == 'publish':
- os.system('python setup.py sdist upload')
- sys.exit()
NAME = "future"
PACKAGES = ["future",
"future.builtins",
- "future.builtins.backports",
- "future.tests",
+ "future.types",
"future.standard_library",
- "future.standard_library.html",
- "future.standard_library.http",
- "future.standard_library.test",
+ "future.backports",
+ "future.backports.email",
+ "future.backports.email.mime",
+ "future.backports.html",
+ "future.backports.http",
+ "future.backports.test",
+ "future.backports.urllib",
+ "future.backports.xmlrpc",
+ "future.moves",
+ "future.moves.dbm",
+ "future.moves.html",
+ "future.moves.http",
+ "future.moves.test",
+ "future.moves.tkinter",
+ "future.moves.urllib",
+ "future.moves.xmlrpc",
+ "future.tests", # for future.tests.base
+ # "future.tests.test_email",
"future.utils",
+ "past",
+ "past.builtins",
+ "past.types",
+ "past.utils",
+ "past.translation",
"libfuturize",
- "libfuturize.fixes2",
- "libfuturize.fixes3"]
+ "libfuturize.fixes",
+ "libpasteurize",
+ "libpasteurize.fixes",
+ ]
+
+# PEP 3108 stdlib moves:
+if sys.version_info[:2] < (3, 0):
+ PACKAGES += [
+ "builtins",
+ # "configparser", # removed in v0.16.0
+ "copyreg",
+ "html",
+ "http",
+ "queue",
+ "reprlib",
+ "socketserver",
+ "tkinter",
+ "winreg",
+ "xmlrpc",
+ "_dummy_thread",
+ "_markupbase",
+ "_thread",
+ ]
+
PACKAGE_DATA = {'': [
'README.rst',
'LICENSE.txt',
'futurize.py',
- 'discover_tests.py',
- 'check_rst.sh'
- ]}
-REQUIRES = []
-VERSION = future.__version__
+ 'pasteurize.py',
+ 'check_rst.sh',
+ 'TESTING.txt',
+ ],
+ 'tests': ['*.py'],
+ }
+
+import src.future
+VERSION = src.future.__version__
DESCRIPTION = "Clean single-source support for Python 3 and 2"
-LONG_DESC = future.__doc__
+LONG_DESC = src.future.__doc__
AUTHOR = "Ed Schofield"
AUTHOR_EMAIL = "ed@pythoncharmers.com"
-URL="https://github.com/PythonCharmers/python-future"
+URL="https://python-future.org"
LICENSE = "MIT"
-KEYWORDS = "future python3 migration backport six 2to3 futurize modernize"
+KEYWORDS = "future past python3 migration futurize backport six 2to3 modernize pasteurize 3to2"
CLASSIFIERS = [
"Programming Language :: Python",
+ "Programming Language :: Python :: 2",
"Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.3",
+ "Programming Language :: Python :: 3.4",
+ "Programming Language :: Python :: 3.5",
+ "Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
"License :: OSI Approved",
"License :: OSI Approved :: MIT License",
- "Development Status :: 4 - Beta",
+ "Development Status :: 6 - Mature",
"Intended Audience :: Developers",
]
setup_kwds = {}
+
+# * Important *
+# We forcibly remove the build folder to avoid breaking the
+# user's Py3 installation if they run "python2 setup.py
+# build" and then "python3 setup.py install".
+
+try:
+ # If the user happens to run:
+ # python2 setup.py build
+ # python3 setup.py install
+ # then folders like "copyreg" will be in build/lib.
+ # If so, we CANNOT let the user install this, because
+ # this may break his/her Python 3 install, depending on the folder order in
+ # sys.path. (Running "import html" etc. may pick up our Py2
+ # substitute packages, instead of the intended system stdlib modules.)
+ SYSTEM_MODULES = set([
+ '_dummy_thread',
+ '_markupbase',
+ '_thread',
+ 'builtins',
+ # Catch the case that configparser is in the build folder
+ # from a previous version of `future`:
+ 'configparser',
+ 'copyreg',
+ 'html',
+ 'http',
+ 'queue',
+ 'reprlib',
+ 'socketserver',
+ 'tkinter',
+ 'winreg',
+ 'xmlrpc'
+ ])
+
+ if sys.version_info[0] >= 3:
+ # Do any of the above folders exist in build/lib?
+ files = os.listdir(os.path.join('build', 'lib'))
+ if len(set(files) & set(SYSTEM_MODULES)) > 0:
+ print('ERROR: Your build folder is in an inconsistent state for '
+ 'a Python 3.x install. Please remove it manually and run '
+ 'setup.py again.', file=sys.stderr)
+ sys.exit(1)
+except OSError:
+ pass
+
setup(name=NAME,
version=VERSION,
author=AUTHOR,
author_email=AUTHOR_EMAIL,
url=URL,
+ project_urls={
+ 'Source': 'https://github.com/PythonCharmers/python-future',
+ },
description=DESCRIPTION,
long_description=LONG_DESC,
license=LICENSE,
keywords=KEYWORDS,
entry_points={
'console_scripts': [
- 'futurize = libfuturize.main:main'
+ 'futurize = libfuturize.main:main',
+ 'pasteurize = libpasteurize.main:main'
]
},
+ package_dir={'': 'src'},
packages=PACKAGES,
package_data=PACKAGE_DATA,
include_package_data=True,
- install_requires=REQUIRES,
+ python_requires=">=2.6, !=3.0.*, !=3.1.*, !=3.2.*",
classifiers=CLASSIFIERS,
- test_suite = "discover_tests",
**setup_kwds
)
-
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 00000000..acdbb31a
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1,2 @@
+# Make this a package only for the sake of importing
+# src.future.__version__ etc. from setup.py
diff --git a/src/_dummy_thread/__init__.py b/src/_dummy_thread/__init__.py
new file mode 100644
index 00000000..63dced6e
--- /dev/null
+++ b/src/_dummy_thread/__init__.py
@@ -0,0 +1,10 @@
+from __future__ import absolute_import
+import sys
+__future_module__ = True
+
+if sys.version_info[0] < 3:
+ from dummy_thread import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/src/_markupbase/__init__.py b/src/_markupbase/__init__.py
new file mode 100644
index 00000000..29090654
--- /dev/null
+++ b/src/_markupbase/__init__.py
@@ -0,0 +1,10 @@
+from __future__ import absolute_import
+import sys
+__future_module__ = True
+
+if sys.version_info[0] < 3:
+ from markupbase import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/src/_thread/__init__.py b/src/_thread/__init__.py
new file mode 100644
index 00000000..9f2a51c7
--- /dev/null
+++ b/src/_thread/__init__.py
@@ -0,0 +1,10 @@
+from __future__ import absolute_import
+import sys
+__future_module__ = True
+
+if sys.version_info[0] < 3:
+ from thread import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/src/builtins/__init__.py b/src/builtins/__init__.py
new file mode 100644
index 00000000..4f936f28
--- /dev/null
+++ b/src/builtins/__init__.py
@@ -0,0 +1,12 @@
+from __future__ import absolute_import
+import sys
+__future_module__ = True
+
+if sys.version_info[0] < 3:
+ from __builtin__ import *
+ # Overwrite any old definitions with the equivalent future.builtins ones:
+ from future.builtins import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/src/copyreg/__init__.py b/src/copyreg/__init__.py
new file mode 100644
index 00000000..51bd4b9a
--- /dev/null
+++ b/src/copyreg/__init__.py
@@ -0,0 +1,9 @@
+from __future__ import absolute_import
+import sys
+
+if sys.version_info[0] < 3:
+ from copy_reg import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/src/future/__init__.py b/src/future/__init__.py
new file mode 100644
index 00000000..b097fd81
--- /dev/null
+++ b/src/future/__init__.py
@@ -0,0 +1,92 @@
+"""
+future: Easy, safe support for Python 2/3 compatibility
+=======================================================
+
+``future`` is the missing compatibility layer between Python 2 and Python
+3. It allows you to use a single, clean Python 3.x-compatible codebase to
+support both Python 2 and Python 3 with minimal overhead.
+
+It is designed to be used as follows::
+
+ from __future__ import (absolute_import, division,
+ print_function, unicode_literals)
+ from builtins import (
+ bytes, dict, int, list, object, range, str,
+ ascii, chr, hex, input, next, oct, open,
+ pow, round, super,
+ filter, map, zip)
+
+followed by predominantly standard, idiomatic Python 3 code that then runs
+similarly on Python 2.6/2.7 and Python 3.3+.
+
+The imports have no effect on Python 3. On Python 2, they shadow the
+corresponding builtins, which normally have different semantics on Python 3
+versus 2, to provide their Python 3 semantics.
+
+
+Standard library reorganization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``future`` supports the standard library reorganization (PEP 3108) through the
+following Py3 interfaces:
+
+ >>> # Top-level packages with Py3 names provided on Py2:
+ >>> import html.parser
+ >>> import queue
+ >>> import tkinter.dialog
+ >>> import xmlrpc.client
+ >>> # etc.
+
+ >>> # Aliases provided for extensions to existing Py2 module names:
+ >>> from future.standard_library import install_aliases
+ >>> install_aliases()
+
+ >>> from collections import Counter, OrderedDict # backported to Py2.6
+ >>> from collections import UserDict, UserList, UserString
+ >>> import urllib.request
+ >>> from itertools import filterfalse, zip_longest
+ >>> from subprocess import getoutput, getstatusoutput
+
+
+Automatic conversion
+--------------------
+
+An included script called `futurize
+`_ aids in converting
+code (from either Python 2 or Python 3) to code compatible with both
+platforms. It is similar to ``python-modernize`` but goes further in
+providing Python 3 compatibility through the use of the backported types
+and builtin functions in ``future``.
+
+
+Documentation
+-------------
+
+See: https://python-future.org
+
+
+Credits
+-------
+
+:Author: Ed Schofield, Jordan M. Adler, et al
+:Sponsor: Python Charmers: https://pythoncharmers.com
+:Others: See docs/credits.rst or https://python-future.org/credits.html
+
+
+Licensing
+---------
+Copyright 2013-2024 Python Charmers, Australia.
+The software is distributed under an MIT licence. See LICENSE.txt.
+
+"""
+
+__title__ = 'future'
+__author__ = 'Ed Schofield'
+__license__ = 'MIT'
+__copyright__ = 'Copyright 2013-2024 Python Charmers (https://pythoncharmers.com)'
+__ver_major__ = 1
+__ver_minor__ = 0
+__ver_patch__ = 0
+__ver_sub__ = ''
+__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__,
+ __ver_patch__, __ver_sub__)
diff --git a/src/future/backports/__init__.py b/src/future/backports/__init__.py
new file mode 100644
index 00000000..c71e0653
--- /dev/null
+++ b/src/future/backports/__init__.py
@@ -0,0 +1,26 @@
+"""
+future.backports package
+"""
+
+from __future__ import absolute_import
+
+import sys
+
+__future_module__ = True
+from future.standard_library import import_top_level_modules
+
+
+if sys.version_info[0] >= 3:
+ import_top_level_modules()
+
+
+from .misc import (ceil,
+ OrderedDict,
+ Counter,
+ ChainMap,
+ check_output,
+ count,
+ recursive_repr,
+ _count_elements,
+ cmp_to_key
+ )
diff --git a/future/standard_library/_markupbase.py b/src/future/backports/_markupbase.py
similarity index 100%
rename from future/standard_library/_markupbase.py
rename to src/future/backports/_markupbase.py
diff --git a/src/future/backports/datetime.py b/src/future/backports/datetime.py
new file mode 100644
index 00000000..8cd62ddf
--- /dev/null
+++ b/src/future/backports/datetime.py
@@ -0,0 +1,2152 @@
+"""Concrete date/time and related types.
+
+See http://www.iana.org/time-zones/repository/tz-link.html for
+time zone and DST data sources.
+"""
+from __future__ import division
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import absolute_import
+from future.builtins import str
+from future.builtins import bytes
+from future.builtins import map
+from future.builtins import round
+from future.builtins import int
+from future.builtins import object
+from future.utils import native_str, PY2
+
+import time as _time
+import math as _math
+
+def _cmp(x, y):
+ return 0 if x == y else 1 if x > y else -1
+
+MINYEAR = 1
+MAXYEAR = 9999
+_MAXORDINAL = 3652059 # date.max.toordinal()
+
+# Utility functions, adapted from Python's Demo/classes/Dates.py, which
+# also assumes the current Gregorian calendar indefinitely extended in
+# both directions. Difference: Dates.py calls January 1 of year 0 day
+# number 1. The code here calls January 1 of year 1 day number 1. This is
+# to match the definition of the "proleptic Gregorian" calendar in Dershowitz
+# and Reingold's "Calendrical Calculations", where it's the base calendar
+# for all computations. See the book for algorithms for converting between
+# proleptic Gregorian ordinals and many other calendar systems.
+
+_DAYS_IN_MONTH = [None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
+
+_DAYS_BEFORE_MONTH = [None]
+dbm = 0
+for dim in _DAYS_IN_MONTH[1:]:
+ _DAYS_BEFORE_MONTH.append(dbm)
+ dbm += dim
+del dbm, dim
+
+def _is_leap(year):
+ "year -> 1 if leap year, else 0."
+ return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
+
+def _days_before_year(year):
+ "year -> number of days before January 1st of year."
+ y = year - 1
+ return y*365 + y//4 - y//100 + y//400
+
+def _days_in_month(year, month):
+ "year, month -> number of days in that month in that year."
+ assert 1 <= month <= 12, month
+ if month == 2 and _is_leap(year):
+ return 29
+ return _DAYS_IN_MONTH[month]
+
+def _days_before_month(year, month):
+ "year, month -> number of days in year preceding first day of month."
+ assert 1 <= month <= 12, 'month must be in 1..12'
+ return _DAYS_BEFORE_MONTH[month] + (month > 2 and _is_leap(year))
+
+def _ymd2ord(year, month, day):
+ "year, month, day -> ordinal, considering 01-Jan-0001 as day 1."
+ assert 1 <= month <= 12, 'month must be in 1..12'
+ dim = _days_in_month(year, month)
+ assert 1 <= day <= dim, ('day must be in 1..%d' % dim)
+ return (_days_before_year(year) +
+ _days_before_month(year, month) +
+ day)
+
+_DI400Y = _days_before_year(401) # number of days in 400 years
+_DI100Y = _days_before_year(101) # " " " " 100 "
+_DI4Y = _days_before_year(5) # " " " " 4 "
+
+# A 4-year cycle has an extra leap day over what we'd get from pasting
+# together 4 single years.
+assert _DI4Y == 4 * 365 + 1
+
+# Similarly, a 400-year cycle has an extra leap day over what we'd get from
+# pasting together 4 100-year cycles.
+assert _DI400Y == 4 * _DI100Y + 1
+
+# OTOH, a 100-year cycle has one fewer leap day than we'd get from
+# pasting together 25 4-year cycles.
+assert _DI100Y == 25 * _DI4Y - 1
+
+def _ord2ymd(n):
+ "ordinal -> (year, month, day), considering 01-Jan-0001 as day 1."
+
+ # n is a 1-based index, starting at 1-Jan-1. The pattern of leap years
+ # repeats exactly every 400 years. The basic strategy is to find the
+ # closest 400-year boundary at or before n, then work with the offset
+ # from that boundary to n. Life is much clearer if we subtract 1 from
+ # n first -- then the values of n at 400-year boundaries are exactly
+ # those divisible by _DI400Y:
+ #
+ # D M Y n n-1
+ # -- --- ---- ---------- ----------------
+ # 31 Dec -400 -_DI400Y -_DI400Y -1
+ # 1 Jan -399 -_DI400Y +1 -_DI400Y 400-year boundary
+ # ...
+ # 30 Dec 000 -1 -2
+ # 31 Dec 000 0 -1
+ # 1 Jan 001 1 0 400-year boundary
+ # 2 Jan 001 2 1
+ # 3 Jan 001 3 2
+ # ...
+ # 31 Dec 400 _DI400Y _DI400Y -1
+ # 1 Jan 401 _DI400Y +1 _DI400Y 400-year boundary
+ n -= 1
+ n400, n = divmod(n, _DI400Y)
+ year = n400 * 400 + 1 # ..., -399, 1, 401, ...
+
+ # Now n is the (non-negative) offset, in days, from January 1 of year, to
+ # the desired date. Now compute how many 100-year cycles precede n.
+ # Note that it's possible for n100 to equal 4! In that case 4 full
+ # 100-year cycles precede the desired day, which implies the desired
+ # day is December 31 at the end of a 400-year cycle.
+ n100, n = divmod(n, _DI100Y)
+
+ # Now compute how many 4-year cycles precede it.
+ n4, n = divmod(n, _DI4Y)
+
+ # And now how many single years. Again n1 can be 4, and again meaning
+ # that the desired day is December 31 at the end of the 4-year cycle.
+ n1, n = divmod(n, 365)
+
+ year += n100 * 100 + n4 * 4 + n1
+ if n1 == 4 or n100 == 4:
+ assert n == 0
+ return year-1, 12, 31
+
+ # Now the year is correct, and n is the offset from January 1. We find
+ # the month via an estimate that's either exact or one too large.
+ leapyear = n1 == 3 and (n4 != 24 or n100 == 3)
+ assert leapyear == _is_leap(year)
+ month = (n + 50) >> 5
+ preceding = _DAYS_BEFORE_MONTH[month] + (month > 2 and leapyear)
+ if preceding > n: # estimate is too large
+ month -= 1
+ preceding -= _DAYS_IN_MONTH[month] + (month == 2 and leapyear)
+ n -= preceding
+ assert 0 <= n < _days_in_month(year, month)
+
+ # Now the year and month are correct, and n is the offset from the
+ # start of that month: we're done!
+ return year, month, n+1
+
+# Month and day names. For localized versions, see the calendar module.
+_MONTHNAMES = [None, "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
+_DAYNAMES = [None, "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
+
+
+def _build_struct_time(y, m, d, hh, mm, ss, dstflag):
+ wday = (_ymd2ord(y, m, d) + 6) % 7
+ dnum = _days_before_month(y, m) + d
+ return _time.struct_time((y, m, d, hh, mm, ss, wday, dnum, dstflag))
+
+def _format_time(hh, mm, ss, us):
+ # Skip trailing microseconds when us==0.
+ result = "%02d:%02d:%02d" % (hh, mm, ss)
+ if us:
+ result += ".%06d" % us
+ return result
+
+# Correctly substitute for %z and %Z escapes in strftime formats.
+def _wrap_strftime(object, format, timetuple):
+ # Don't call utcoffset() or tzname() unless actually needed.
+ freplace = None # the string to use for %f
+ zreplace = None # the string to use for %z
+ Zreplace = None # the string to use for %Z
+
+ # Scan format for %z and %Z escapes, replacing as needed.
+ newformat = []
+ push = newformat.append
+ i, n = 0, len(format)
+ while i < n:
+ ch = format[i]
+ i += 1
+ if ch == '%':
+ if i < n:
+ ch = format[i]
+ i += 1
+ if ch == 'f':
+ if freplace is None:
+ freplace = '%06d' % getattr(object,
+ 'microsecond', 0)
+ newformat.append(freplace)
+ elif ch == 'z':
+ if zreplace is None:
+ zreplace = ""
+ if hasattr(object, "utcoffset"):
+ offset = object.utcoffset()
+ if offset is not None:
+ sign = '+'
+ if offset.days < 0:
+ offset = -offset
+ sign = '-'
+ h, m = divmod(offset, timedelta(hours=1))
+ assert not m % timedelta(minutes=1), "whole minute"
+ m //= timedelta(minutes=1)
+ zreplace = '%c%02d%02d' % (sign, h, m)
+ assert '%' not in zreplace
+ newformat.append(zreplace)
+ elif ch == 'Z':
+ if Zreplace is None:
+ Zreplace = ""
+ if hasattr(object, "tzname"):
+ s = object.tzname()
+ if s is not None:
+ # strftime is going to have at this: escape %
+ Zreplace = s.replace('%', '%%')
+ newformat.append(Zreplace)
+ else:
+ push('%')
+ push(ch)
+ else:
+ push('%')
+ else:
+ push(ch)
+ newformat = "".join(newformat)
+ return _time.strftime(newformat, timetuple)
+
+def _call_tzinfo_method(tzinfo, methname, tzinfoarg):
+ if tzinfo is None:
+ return None
+ return getattr(tzinfo, methname)(tzinfoarg)
+
+# Just raise TypeError if the arg isn't None or a string.
+def _check_tzname(name):
+ if name is not None and not isinstance(name, str):
+ raise TypeError("tzinfo.tzname() must return None or string, "
+ "not '%s'" % type(name))
+
+# name is the offset-producing method, "utcoffset" or "dst".
+# offset is what it returned.
+# If offset isn't None or timedelta, raises TypeError.
+# If offset is None, returns None.
+# Else offset is checked for being in range, and a whole # of minutes.
+# If it is, its integer value is returned. Else ValueError is raised.
+def _check_utc_offset(name, offset):
+ assert name in ("utcoffset", "dst")
+ if offset is None:
+ return
+ if not isinstance(offset, timedelta):
+ raise TypeError("tzinfo.%s() must return None "
+ "or timedelta, not '%s'" % (name, type(offset)))
+ if offset % timedelta(minutes=1) or offset.microseconds:
+ raise ValueError("tzinfo.%s() must return a whole number "
+ "of minutes, got %s" % (name, offset))
+ if not -timedelta(1) < offset < timedelta(1):
+ raise ValueError("%s()=%s, must be must be strictly between"
+ " -timedelta(hours=24) and timedelta(hours=24)"
+ % (name, offset))
+
+def _check_date_fields(year, month, day):
+ if not isinstance(year, int):
+ raise TypeError('int expected')
+ if not MINYEAR <= year <= MAXYEAR:
+ raise ValueError('year must be in %d..%d' % (MINYEAR, MAXYEAR), year)
+ if not 1 <= month <= 12:
+ raise ValueError('month must be in 1..12', month)
+ dim = _days_in_month(year, month)
+ if not 1 <= day <= dim:
+ raise ValueError('day must be in 1..%d' % dim, day)
+
+def _check_time_fields(hour, minute, second, microsecond):
+ if not isinstance(hour, int):
+ raise TypeError('int expected')
+ if not 0 <= hour <= 23:
+ raise ValueError('hour must be in 0..23', hour)
+ if not 0 <= minute <= 59:
+ raise ValueError('minute must be in 0..59', minute)
+ if not 0 <= second <= 59:
+ raise ValueError('second must be in 0..59', second)
+ if not 0 <= microsecond <= 999999:
+ raise ValueError('microsecond must be in 0..999999', microsecond)
+
+def _check_tzinfo_arg(tz):
+ if tz is not None and not isinstance(tz, tzinfo):
+ raise TypeError("tzinfo argument must be None or of a tzinfo subclass")
+
+def _cmperror(x, y):
+ raise TypeError("can't compare '%s' to '%s'" % (
+ type(x).__name__, type(y).__name__))
+
+class timedelta(object):
+ """Represent the difference between two datetime objects.
+
+ Supported operators:
+
+ - add, subtract timedelta
+ - unary plus, minus, abs
+ - compare to timedelta
+ - multiply, divide by int
+
+ In addition, datetime supports subtraction of two datetime objects
+ returning a timedelta, and addition or subtraction of a datetime
+ and a timedelta giving a datetime.
+
+ Representation: (days, seconds, microseconds). Why? Because I
+ felt like it.
+ """
+ __slots__ = '_days', '_seconds', '_microseconds'
+
+ def __new__(cls, days=0, seconds=0, microseconds=0,
+ milliseconds=0, minutes=0, hours=0, weeks=0):
+ # Doing this efficiently and accurately in C is going to be difficult
+ # and error-prone, due to ubiquitous overflow possibilities, and that
+ # C double doesn't have enough bits of precision to represent
+ # microseconds over 10K years faithfully. The code here tries to make
+ # explicit where go-fast assumptions can be relied on, in order to
+ # guide the C implementation; it's way more convoluted than speed-
+ # ignoring auto-overflow-to-long idiomatic Python could be.
+
+ # XXX Check that all inputs are ints or floats.
+
+ # Final values, all integer.
+ # s and us fit in 32-bit signed ints; d isn't bounded.
+ d = s = us = 0
+
+ # Normalize everything to days, seconds, microseconds.
+ days += weeks*7
+ seconds += minutes*60 + hours*3600
+ microseconds += milliseconds*1000
+
+ # Get rid of all fractions, and normalize s and us.
+ # Take a deep breath .
+ if isinstance(days, float):
+ dayfrac, days = _math.modf(days)
+ daysecondsfrac, daysecondswhole = _math.modf(dayfrac * (24.*3600.))
+ assert daysecondswhole == int(daysecondswhole) # can't overflow
+ s = int(daysecondswhole)
+ assert days == int(days)
+ d = int(days)
+ else:
+ daysecondsfrac = 0.0
+ d = days
+ assert isinstance(daysecondsfrac, float)
+ assert abs(daysecondsfrac) <= 1.0
+ assert isinstance(d, int)
+ assert abs(s) <= 24 * 3600
+ # days isn't referenced again before redefinition
+
+ if isinstance(seconds, float):
+ secondsfrac, seconds = _math.modf(seconds)
+ assert seconds == int(seconds)
+ seconds = int(seconds)
+ secondsfrac += daysecondsfrac
+ assert abs(secondsfrac) <= 2.0
+ else:
+ secondsfrac = daysecondsfrac
+ # daysecondsfrac isn't referenced again
+ assert isinstance(secondsfrac, float)
+ assert abs(secondsfrac) <= 2.0
+
+ assert isinstance(seconds, int)
+ days, seconds = divmod(seconds, 24*3600)
+ d += days
+ s += int(seconds) # can't overflow
+ assert isinstance(s, int)
+ assert abs(s) <= 2 * 24 * 3600
+ # seconds isn't referenced again before redefinition
+
+ usdouble = secondsfrac * 1e6
+ assert abs(usdouble) < 2.1e6 # exact value not critical
+ # secondsfrac isn't referenced again
+
+ if isinstance(microseconds, float):
+ microseconds += usdouble
+ microseconds = round(microseconds, 0)
+ seconds, microseconds = divmod(microseconds, 1e6)
+ assert microseconds == int(microseconds)
+ assert seconds == int(seconds)
+ days, seconds = divmod(seconds, 24.*3600.)
+ assert days == int(days)
+ assert seconds == int(seconds)
+ d += int(days)
+ s += int(seconds) # can't overflow
+ assert isinstance(s, int)
+ assert abs(s) <= 3 * 24 * 3600
+ else:
+ seconds, microseconds = divmod(microseconds, 1000000)
+ days, seconds = divmod(seconds, 24*3600)
+ d += days
+ s += int(seconds) # can't overflow
+ assert isinstance(s, int)
+ assert abs(s) <= 3 * 24 * 3600
+ microseconds = float(microseconds)
+ microseconds += usdouble
+ microseconds = round(microseconds, 0)
+ assert abs(s) <= 3 * 24 * 3600
+ assert abs(microseconds) < 3.1e6
+
+ # Just a little bit of carrying possible for microseconds and seconds.
+ assert isinstance(microseconds, float)
+ assert int(microseconds) == microseconds
+ us = int(microseconds)
+ seconds, us = divmod(us, 1000000)
+ s += seconds # cant't overflow
+ assert isinstance(s, int)
+ days, s = divmod(s, 24*3600)
+ d += days
+
+ assert isinstance(d, int)
+ assert isinstance(s, int) and 0 <= s < 24*3600
+ assert isinstance(us, int) and 0 <= us < 1000000
+
+ self = object.__new__(cls)
+
+ self._days = d
+ self._seconds = s
+ self._microseconds = us
+ if abs(d) > 999999999:
+ raise OverflowError("timedelta # of days is too large: %d" % d)
+
+ return self
+
+ def __repr__(self):
+ if self._microseconds:
+ return "%s(%d, %d, %d)" % ('datetime.' + self.__class__.__name__,
+ self._days,
+ self._seconds,
+ self._microseconds)
+ if self._seconds:
+ return "%s(%d, %d)" % ('datetime.' + self.__class__.__name__,
+ self._days,
+ self._seconds)
+ return "%s(%d)" % ('datetime.' + self.__class__.__name__, self._days)
+
+ def __str__(self):
+ mm, ss = divmod(self._seconds, 60)
+ hh, mm = divmod(mm, 60)
+ s = "%d:%02d:%02d" % (hh, mm, ss)
+ if self._days:
+ def plural(n):
+ return n, abs(n) != 1 and "s" or ""
+ s = ("%d day%s, " % plural(self._days)) + s
+ if self._microseconds:
+ s = s + ".%06d" % self._microseconds
+ return s
+
+ def total_seconds(self):
+ """Total seconds in the duration."""
+ return ((self.days * 86400 + self.seconds)*10**6 +
+ self.microseconds) / 10**6
+
+ # Read-only field accessors
+ @property
+ def days(self):
+ """days"""
+ return self._days
+
+ @property
+ def seconds(self):
+ """seconds"""
+ return self._seconds
+
+ @property
+ def microseconds(self):
+ """microseconds"""
+ return self._microseconds
+
+ def __add__(self, other):
+ if isinstance(other, timedelta):
+ # for CPython compatibility, we cannot use
+ # our __class__ here, but need a real timedelta
+ return timedelta(self._days + other._days,
+ self._seconds + other._seconds,
+ self._microseconds + other._microseconds)
+ return NotImplemented
+
+ __radd__ = __add__
+
+ def __sub__(self, other):
+ if isinstance(other, timedelta):
+ # for CPython compatibility, we cannot use
+ # our __class__ here, but need a real timedelta
+ return timedelta(self._days - other._days,
+ self._seconds - other._seconds,
+ self._microseconds - other._microseconds)
+ return NotImplemented
+
+ def __rsub__(self, other):
+ if isinstance(other, timedelta):
+ return -self + other
+ return NotImplemented
+
+ def __neg__(self):
+ # for CPython compatibility, we cannot use
+ # our __class__ here, but need a real timedelta
+ return timedelta(-self._days,
+ -self._seconds,
+ -self._microseconds)
+
+ def __pos__(self):
+ return self
+
+ def __abs__(self):
+ if self._days < 0:
+ return -self
+ else:
+ return self
+
+ def __mul__(self, other):
+ if isinstance(other, int):
+ # for CPython compatibility, we cannot use
+ # our __class__ here, but need a real timedelta
+ return timedelta(self._days * other,
+ self._seconds * other,
+ self._microseconds * other)
+ if isinstance(other, float):
+ a, b = other.as_integer_ratio()
+ return self * a / b
+ return NotImplemented
+
+ __rmul__ = __mul__
+
+ def _to_microseconds(self):
+ return ((self._days * (24*3600) + self._seconds) * 1000000 +
+ self._microseconds)
+
+ def __floordiv__(self, other):
+ if not isinstance(other, (int, timedelta)):
+ return NotImplemented
+ usec = self._to_microseconds()
+ if isinstance(other, timedelta):
+ return usec // other._to_microseconds()
+ if isinstance(other, int):
+ return timedelta(0, 0, usec // other)
+
+ def __truediv__(self, other):
+ if not isinstance(other, (int, float, timedelta)):
+ return NotImplemented
+ usec = self._to_microseconds()
+ if isinstance(other, timedelta):
+ return usec / other._to_microseconds()
+ if isinstance(other, int):
+ return timedelta(0, 0, usec / other)
+ if isinstance(other, float):
+ a, b = other.as_integer_ratio()
+ return timedelta(0, 0, b * usec / a)
+
+ def __mod__(self, other):
+ if isinstance(other, timedelta):
+ r = self._to_microseconds() % other._to_microseconds()
+ return timedelta(0, 0, r)
+ return NotImplemented
+
+ def __divmod__(self, other):
+ if isinstance(other, timedelta):
+ q, r = divmod(self._to_microseconds(),
+ other._to_microseconds())
+ return q, timedelta(0, 0, r)
+ return NotImplemented
+
+ # Comparisons of timedelta objects with other.
+
+ def __eq__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) == 0
+ else:
+ return False
+
+ def __ne__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) != 0
+ else:
+ return True
+
+ def __le__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) <= 0
+ else:
+ _cmperror(self, other)
+
+ def __lt__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) < 0
+ else:
+ _cmperror(self, other)
+
+ def __ge__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) >= 0
+ else:
+ _cmperror(self, other)
+
+ def __gt__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) > 0
+ else:
+ _cmperror(self, other)
+
+ def _cmp(self, other):
+ assert isinstance(other, timedelta)
+ return _cmp(self._getstate(), other._getstate())
+
+ def __hash__(self):
+ return hash(self._getstate())
+
+ def __bool__(self):
+ return (self._days != 0 or
+ self._seconds != 0 or
+ self._microseconds != 0)
+
+ # Pickle support.
+
+ def _getstate(self):
+ return (self._days, self._seconds, self._microseconds)
+
+ def __reduce__(self):
+ return (self.__class__, self._getstate())
+
+timedelta.min = timedelta(-999999999)
+timedelta.max = timedelta(days=999999999, hours=23, minutes=59, seconds=59,
+ microseconds=999999)
+timedelta.resolution = timedelta(microseconds=1)
+
+class date(object):
+ """Concrete date type.
+
+ Constructors:
+
+ __new__()
+ fromtimestamp()
+ today()
+ fromordinal()
+
+ Operators:
+
+ __repr__, __str__
+ __cmp__, __hash__
+ __add__, __radd__, __sub__ (add/radd only with timedelta arg)
+
+ Methods:
+
+ timetuple()
+ toordinal()
+ weekday()
+ isoweekday(), isocalendar(), isoformat()
+ ctime()
+ strftime()
+
+ Properties (readonly):
+ year, month, day
+ """
+ __slots__ = '_year', '_month', '_day'
+
+ def __new__(cls, year, month=None, day=None):
+ """Constructor.
+
+ Arguments:
+
+ year, month, day (required, base 1)
+ """
+ if (isinstance(year, bytes) and len(year) == 4 and
+ 1 <= year[2] <= 12 and month is None): # Month is sane
+ # Pickle support
+ self = object.__new__(cls)
+ self.__setstate(year)
+ return self
+ _check_date_fields(year, month, day)
+ self = object.__new__(cls)
+ self._year = year
+ self._month = month
+ self._day = day
+ return self
+
+ # Additional constructors
+
+ @classmethod
+ def fromtimestamp(cls, t):
+ "Construct a date from a POSIX timestamp (like time.time())."
+ y, m, d, hh, mm, ss, weekday, jday, dst = _time.localtime(t)
+ return cls(y, m, d)
+
+ @classmethod
+ def today(cls):
+ "Construct a date from time.time()."
+ t = _time.time()
+ return cls.fromtimestamp(t)
+
+ @classmethod
+ def fromordinal(cls, n):
+ """Construct a date from a proleptic Gregorian ordinal.
+
+ January 1 of year 1 is day 1. Only the year, month and day are
+ non-zero in the result.
+ """
+ y, m, d = _ord2ymd(n)
+ return cls(y, m, d)
+
+ # Conversions to string
+
+ def __repr__(self):
+ """Convert to formal string, for repr().
+
+ >>> dt = datetime(2010, 1, 1)
+ >>> repr(dt)
+ 'datetime.datetime(2010, 1, 1, 0, 0)'
+
+ >>> dt = datetime(2010, 1, 1, tzinfo=timezone.utc)
+ >>> repr(dt)
+ 'datetime.datetime(2010, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)'
+ """
+ return "%s(%d, %d, %d)" % ('datetime.' + self.__class__.__name__,
+ self._year,
+ self._month,
+ self._day)
+ # XXX These shouldn't depend on time.localtime(), because that
+ # clips the usable dates to [1970 .. 2038). At least ctime() is
+ # easily done without using strftime() -- that's better too because
+ # strftime("%c", ...) is locale specific.
+
+
+ def ctime(self):
+ "Return ctime() style string."
+ weekday = self.toordinal() % 7 or 7
+ return "%s %s %2d 00:00:00 %04d" % (
+ _DAYNAMES[weekday],
+ _MONTHNAMES[self._month],
+ self._day, self._year)
+
+ def strftime(self, fmt):
+ "Format using strftime()."
+ return _wrap_strftime(self, fmt, self.timetuple())
+
+ def __format__(self, fmt):
+ if len(fmt) != 0:
+ return self.strftime(fmt)
+ return str(self)
+
+ def isoformat(self):
+ """Return the date formatted according to ISO.
+
+ This is 'YYYY-MM-DD'.
+
+ References:
+ - http://www.w3.org/TR/NOTE-datetime
+ - http://www.cl.cam.ac.uk/~mgk25/iso-time.html
+ """
+ return "%04d-%02d-%02d" % (self._year, self._month, self._day)
+
+ __str__ = isoformat
+
+ # Read-only field accessors
+ @property
+ def year(self):
+ """year (1-9999)"""
+ return self._year
+
+ @property
+ def month(self):
+ """month (1-12)"""
+ return self._month
+
+ @property
+ def day(self):
+ """day (1-31)"""
+ return self._day
+
+ # Standard conversions, __cmp__, __hash__ (and helpers)
+
+ def timetuple(self):
+ "Return local time tuple compatible with time.localtime()."
+ return _build_struct_time(self._year, self._month, self._day,
+ 0, 0, 0, -1)
+
+ def toordinal(self):
+ """Return proleptic Gregorian ordinal for the year, month and day.
+
+ January 1 of year 1 is day 1. Only the year, month and day values
+ contribute to the result.
+ """
+ return _ymd2ord(self._year, self._month, self._day)
+
+ def replace(self, year=None, month=None, day=None):
+ """Return a new date with new values for the specified fields."""
+ if year is None:
+ year = self._year
+ if month is None:
+ month = self._month
+ if day is None:
+ day = self._day
+ _check_date_fields(year, month, day)
+ return date(year, month, day)
+
+ # Comparisons of date objects with other.
+
+ def __eq__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) == 0
+ return NotImplemented
+
+ def __ne__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) != 0
+ return NotImplemented
+
+ def __le__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) <= 0
+ return NotImplemented
+
+ def __lt__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) < 0
+ return NotImplemented
+
+ def __ge__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) >= 0
+ return NotImplemented
+
+ def __gt__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) > 0
+ return NotImplemented
+
+ def _cmp(self, other):
+ assert isinstance(other, date)
+ y, m, d = self._year, self._month, self._day
+ y2, m2, d2 = other._year, other._month, other._day
+ return _cmp((y, m, d), (y2, m2, d2))
+
+ def __hash__(self):
+ "Hash."
+ return hash(self._getstate())
+
+ # Computations
+
+ def __add__(self, other):
+ "Add a date to a timedelta."
+ if isinstance(other, timedelta):
+ o = self.toordinal() + other.days
+ if 0 < o <= _MAXORDINAL:
+ return date.fromordinal(o)
+ raise OverflowError("result out of range")
+ return NotImplemented
+
+ __radd__ = __add__
+
+ def __sub__(self, other):
+ """Subtract two dates, or a date and a timedelta."""
+ if isinstance(other, timedelta):
+ return self + timedelta(-other.days)
+ if isinstance(other, date):
+ days1 = self.toordinal()
+ days2 = other.toordinal()
+ return timedelta(days1 - days2)
+ return NotImplemented
+
+ def weekday(self):
+ "Return day of the week, where Monday == 0 ... Sunday == 6."
+ return (self.toordinal() + 6) % 7
+
+ # Day-of-the-week and week-of-the-year, according to ISO
+
+ def isoweekday(self):
+ "Return day of the week, where Monday == 1 ... Sunday == 7."
+ # 1-Jan-0001 is a Monday
+ return self.toordinal() % 7 or 7
+
+ def isocalendar(self):
+ """Return a 3-tuple containing ISO year, week number, and weekday.
+
+ The first ISO week of the year is the (Mon-Sun) week
+ containing the year's first Thursday; everything else derives
+ from that.
+
+ The first week is 1; Monday is 1 ... Sunday is 7.
+
+ ISO calendar algorithm taken from
+ http://www.phys.uu.nl/~vgent/calendar/isocalendar.htm
+ """
+ year = self._year
+ week1monday = _isoweek1monday(year)
+ today = _ymd2ord(self._year, self._month, self._day)
+ # Internally, week and day have origin 0
+ week, day = divmod(today - week1monday, 7)
+ if week < 0:
+ year -= 1
+ week1monday = _isoweek1monday(year)
+ week, day = divmod(today - week1monday, 7)
+ elif week >= 52:
+ if today >= _isoweek1monday(year+1):
+ year += 1
+ week = 0
+ return year, week+1, day+1
+
+ # Pickle support.
+
+ def _getstate(self):
+ yhi, ylo = divmod(self._year, 256)
+ return bytes([yhi, ylo, self._month, self._day]),
+
+ def __setstate(self, string):
+ if len(string) != 4 or not (1 <= string[2] <= 12):
+ raise TypeError("not enough arguments")
+ yhi, ylo, self._month, self._day = string
+ self._year = yhi * 256 + ylo
+
+ def __reduce__(self):
+ return (self.__class__, self._getstate())
+
+_date_class = date # so functions w/ args named "date" can get at the class
+
+date.min = date(1, 1, 1)
+date.max = date(9999, 12, 31)
+date.resolution = timedelta(days=1)
+
+class tzinfo(object):
+ """Abstract base class for time zone info classes.
+
+ Subclasses must override the name(), utcoffset() and dst() methods.
+ """
+ __slots__ = ()
+ def tzname(self, dt):
+ "datetime -> string name of time zone."
+ raise NotImplementedError("tzinfo subclass must override tzname()")
+
+ def utcoffset(self, dt):
+ "datetime -> minutes east of UTC (negative for west of UTC)"
+ raise NotImplementedError("tzinfo subclass must override utcoffset()")
+
+ def dst(self, dt):
+ """datetime -> DST offset in minutes east of UTC.
+
+ Return 0 if DST not in effect. utcoffset() must include the DST
+ offset.
+ """
+ raise NotImplementedError("tzinfo subclass must override dst()")
+
+ def fromutc(self, dt):
+ "datetime in UTC -> datetime in local time."
+
+ if not isinstance(dt, datetime):
+ raise TypeError("fromutc() requires a datetime argument")
+ if dt.tzinfo is not self:
+ raise ValueError("dt.tzinfo is not self")
+
+ dtoff = dt.utcoffset()
+ if dtoff is None:
+ raise ValueError("fromutc() requires a non-None utcoffset() "
+ "result")
+
+ # See the long comment block at the end of this file for an
+ # explanation of this algorithm.
+ dtdst = dt.dst()
+ if dtdst is None:
+ raise ValueError("fromutc() requires a non-None dst() result")
+ delta = dtoff - dtdst
+ if delta:
+ dt += delta
+ dtdst = dt.dst()
+ if dtdst is None:
+ raise ValueError("fromutc(): dt.dst gave inconsistent "
+ "results; cannot convert")
+ return dt + dtdst
+
+ # Pickle support.
+
+ def __reduce__(self):
+ getinitargs = getattr(self, "__getinitargs__", None)
+ if getinitargs:
+ args = getinitargs()
+ else:
+ args = ()
+ getstate = getattr(self, "__getstate__", None)
+ if getstate:
+ state = getstate()
+ else:
+ state = getattr(self, "__dict__", None) or None
+ if state is None:
+ return (self.__class__, args)
+ else:
+ return (self.__class__, args, state)
+
+_tzinfo_class = tzinfo
+
+class time(object):
+ """Time with time zone.
+
+ Constructors:
+
+ __new__()
+
+ Operators:
+
+ __repr__, __str__
+ __cmp__, __hash__
+
+ Methods:
+
+ strftime()
+ isoformat()
+ utcoffset()
+ tzname()
+ dst()
+
+ Properties (readonly):
+ hour, minute, second, microsecond, tzinfo
+ """
+
+ def __new__(cls, hour=0, minute=0, second=0, microsecond=0, tzinfo=None):
+ """Constructor.
+
+ Arguments:
+
+ hour, minute (required)
+ second, microsecond (default to zero)
+ tzinfo (default to None)
+ """
+ self = object.__new__(cls)
+ if isinstance(hour, bytes) and len(hour) == 6:
+ # Pickle support
+ self.__setstate(hour, minute or None)
+ return self
+ _check_tzinfo_arg(tzinfo)
+ _check_time_fields(hour, minute, second, microsecond)
+ self._hour = hour
+ self._minute = minute
+ self._second = second
+ self._microsecond = microsecond
+ self._tzinfo = tzinfo
+ return self
+
+ # Read-only field accessors
+ @property
+ def hour(self):
+ """hour (0-23)"""
+ return self._hour
+
+ @property
+ def minute(self):
+ """minute (0-59)"""
+ return self._minute
+
+ @property
+ def second(self):
+ """second (0-59)"""
+ return self._second
+
+ @property
+ def microsecond(self):
+ """microsecond (0-999999)"""
+ return self._microsecond
+
+ @property
+ def tzinfo(self):
+ """timezone info object"""
+ return self._tzinfo
+
+ # Standard conversions, __hash__ (and helpers)
+
+ # Comparisons of time objects with other.
+
+ def __eq__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other, allow_mixed=True) == 0
+ else:
+ return False
+
+ def __ne__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other, allow_mixed=True) != 0
+ else:
+ return True
+
+ def __le__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other) <= 0
+ else:
+ _cmperror(self, other)
+
+ def __lt__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other) < 0
+ else:
+ _cmperror(self, other)
+
+ def __ge__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other) >= 0
+ else:
+ _cmperror(self, other)
+
+ def __gt__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other) > 0
+ else:
+ _cmperror(self, other)
+
+ def _cmp(self, other, allow_mixed=False):
+ assert isinstance(other, time)
+ mytz = self._tzinfo
+ ottz = other._tzinfo
+ myoff = otoff = None
+
+ if mytz is ottz:
+ base_compare = True
+ else:
+ myoff = self.utcoffset()
+ otoff = other.utcoffset()
+ base_compare = myoff == otoff
+
+ if base_compare:
+ return _cmp((self._hour, self._minute, self._second,
+ self._microsecond),
+ (other._hour, other._minute, other._second,
+ other._microsecond))
+ if myoff is None or otoff is None:
+ if allow_mixed:
+ return 2 # arbitrary non-zero value
+ else:
+ raise TypeError("cannot compare naive and aware times")
+ myhhmm = self._hour * 60 + self._minute - myoff//timedelta(minutes=1)
+ othhmm = other._hour * 60 + other._minute - otoff//timedelta(minutes=1)
+ return _cmp((myhhmm, self._second, self._microsecond),
+ (othhmm, other._second, other._microsecond))
+
+ def __hash__(self):
+ """Hash."""
+ tzoff = self.utcoffset()
+ if not tzoff: # zero or None
+ return hash(self._getstate()[0])
+ h, m = divmod(timedelta(hours=self.hour, minutes=self.minute) - tzoff,
+ timedelta(hours=1))
+ assert not m % timedelta(minutes=1), "whole minute"
+ m //= timedelta(minutes=1)
+ if 0 <= h < 24:
+ return hash(time(h, m, self.second, self.microsecond))
+ return hash((h, m, self.second, self.microsecond))
+
+ # Conversion to string
+
+ def _tzstr(self, sep=":"):
+ """Return formatted timezone offset (+xx:xx) or None."""
+ off = self.utcoffset()
+ if off is not None:
+ if off.days < 0:
+ sign = "-"
+ off = -off
+ else:
+ sign = "+"
+ hh, mm = divmod(off, timedelta(hours=1))
+ assert not mm % timedelta(minutes=1), "whole minute"
+ mm //= timedelta(minutes=1)
+ assert 0 <= hh < 24
+ off = "%s%02d%s%02d" % (sign, hh, sep, mm)
+ return off
+
+ def __repr__(self):
+ """Convert to formal string, for repr()."""
+ if self._microsecond != 0:
+ s = ", %d, %d" % (self._second, self._microsecond)
+ elif self._second != 0:
+ s = ", %d" % self._second
+ else:
+ s = ""
+ s= "%s(%d, %d%s)" % ('datetime.' + self.__class__.__name__,
+ self._hour, self._minute, s)
+ if self._tzinfo is not None:
+ assert s[-1:] == ")"
+ s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")"
+ return s
+
+ def isoformat(self):
+ """Return the time formatted according to ISO.
+
+ This is 'HH:MM:SS.mmmmmm+zz:zz', or 'HH:MM:SS+zz:zz' if
+ self.microsecond == 0.
+ """
+ s = _format_time(self._hour, self._minute, self._second,
+ self._microsecond)
+ tz = self._tzstr()
+ if tz:
+ s += tz
+ return s
+
+ __str__ = isoformat
+
+ def strftime(self, fmt):
+ """Format using strftime(). The date part of the timestamp passed
+ to underlying strftime should not be used.
+ """
+ # The year must be >= 1000 else Python's strftime implementation
+ # can raise a bogus exception.
+ timetuple = (1900, 1, 1,
+ self._hour, self._minute, self._second,
+ 0, 1, -1)
+ return _wrap_strftime(self, fmt, timetuple)
+
+ def __format__(self, fmt):
+ if len(fmt) != 0:
+ return self.strftime(fmt)
+ return str(self)
+
+ # Timezone functions
+
+ def utcoffset(self):
+ """Return the timezone offset in minutes east of UTC (negative west of
+ UTC)."""
+ if self._tzinfo is None:
+ return None
+ offset = self._tzinfo.utcoffset(None)
+ _check_utc_offset("utcoffset", offset)
+ return offset
+
+ def tzname(self):
+ """Return the timezone name.
+
+ Note that the name is 100% informational -- there's no requirement that
+ it mean anything in particular. For example, "GMT", "UTC", "-500",
+ "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies.
+ """
+ if self._tzinfo is None:
+ return None
+ name = self._tzinfo.tzname(None)
+ _check_tzname(name)
+ return name
+
+ def dst(self):
+ """Return 0 if DST is not in effect, or the DST offset (in minutes
+ eastward) if DST is in effect.
+
+ This is purely informational; the DST offset has already been added to
+ the UTC offset returned by utcoffset() if applicable, so there's no
+ need to consult dst() unless you're interested in displaying the DST
+ info.
+ """
+ if self._tzinfo is None:
+ return None
+ offset = self._tzinfo.dst(None)
+ _check_utc_offset("dst", offset)
+ return offset
+
+ def replace(self, hour=None, minute=None, second=None, microsecond=None,
+ tzinfo=True):
+ """Return a new time with new values for the specified fields."""
+ if hour is None:
+ hour = self.hour
+ if minute is None:
+ minute = self.minute
+ if second is None:
+ second = self.second
+ if microsecond is None:
+ microsecond = self.microsecond
+ if tzinfo is True:
+ tzinfo = self.tzinfo
+ _check_time_fields(hour, minute, second, microsecond)
+ _check_tzinfo_arg(tzinfo)
+ return time(hour, minute, second, microsecond, tzinfo)
+
+ def __bool__(self):
+ if self.second or self.microsecond:
+ return True
+ offset = self.utcoffset() or timedelta(0)
+ return timedelta(hours=self.hour, minutes=self.minute) != offset
+
+ # Pickle support.
+
+ def _getstate(self):
+ us2, us3 = divmod(self._microsecond, 256)
+ us1, us2 = divmod(us2, 256)
+ basestate = bytes([self._hour, self._minute, self._second,
+ us1, us2, us3])
+ if self._tzinfo is None:
+ return (basestate,)
+ else:
+ return (basestate, self._tzinfo)
+
+ def __setstate(self, string, tzinfo):
+ if len(string) != 6 or string[0] >= 24:
+ raise TypeError("an integer is required")
+ (self._hour, self._minute, self._second,
+ us1, us2, us3) = string
+ self._microsecond = (((us1 << 8) | us2) << 8) | us3
+ if tzinfo is None or isinstance(tzinfo, _tzinfo_class):
+ self._tzinfo = tzinfo
+ else:
+ raise TypeError("bad tzinfo state arg %r" % tzinfo)
+
+ def __reduce__(self):
+ return (time, self._getstate())
+
+_time_class = time # so functions w/ args named "time" can get at the class
+
+time.min = time(0, 0, 0)
+time.max = time(23, 59, 59, 999999)
+time.resolution = timedelta(microseconds=1)
+
+class datetime(date):
+ """datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
+
+ The year, month and day arguments are required. tzinfo may be None, or an
+ instance of a tzinfo subclass. The remaining arguments may be ints.
+ """
+
+ __slots__ = date.__slots__ + (
+ '_hour', '_minute', '_second',
+ '_microsecond', '_tzinfo')
+ def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0,
+ microsecond=0, tzinfo=None):
+ if isinstance(year, bytes) and len(year) == 10:
+ # Pickle support
+ self = date.__new__(cls, year[:4])
+ self.__setstate(year, month)
+ return self
+ _check_tzinfo_arg(tzinfo)
+ _check_time_fields(hour, minute, second, microsecond)
+ self = date.__new__(cls, year, month, day)
+ self._hour = hour
+ self._minute = minute
+ self._second = second
+ self._microsecond = microsecond
+ self._tzinfo = tzinfo
+ return self
+
+ # Read-only field accessors
+ @property
+ def hour(self):
+ """hour (0-23)"""
+ return self._hour
+
+ @property
+ def minute(self):
+ """minute (0-59)"""
+ return self._minute
+
+ @property
+ def second(self):
+ """second (0-59)"""
+ return self._second
+
+ @property
+ def microsecond(self):
+ """microsecond (0-999999)"""
+ return self._microsecond
+
+ @property
+ def tzinfo(self):
+ """timezone info object"""
+ return self._tzinfo
+
+ @classmethod
+ def fromtimestamp(cls, t, tz=None):
+ """Construct a datetime from a POSIX timestamp (like time.time()).
+
+ A timezone info object may be passed in as well.
+ """
+
+ _check_tzinfo_arg(tz)
+
+ converter = _time.localtime if tz is None else _time.gmtime
+
+ t, frac = divmod(t, 1.0)
+ us = int(frac * 1e6)
+
+ # If timestamp is less than one microsecond smaller than a
+ # full second, us can be rounded up to 1000000. In this case,
+ # roll over to seconds, otherwise, ValueError is raised
+ # by the constructor.
+ if us == 1000000:
+ t += 1
+ us = 0
+ y, m, d, hh, mm, ss, weekday, jday, dst = converter(t)
+ ss = min(ss, 59) # clamp out leap seconds if the platform has them
+ result = cls(y, m, d, hh, mm, ss, us, tz)
+ if tz is not None:
+ result = tz.fromutc(result)
+ return result
+
+ @classmethod
+ def utcfromtimestamp(cls, t):
+ "Construct a UTC datetime from a POSIX timestamp (like time.time())."
+ t, frac = divmod(t, 1.0)
+ us = int(frac * 1e6)
+
+ # If timestamp is less than one microsecond smaller than a
+ # full second, us can be rounded up to 1000000. In this case,
+ # roll over to seconds, otherwise, ValueError is raised
+ # by the constructor.
+ if us == 1000000:
+ t += 1
+ us = 0
+ y, m, d, hh, mm, ss, weekday, jday, dst = _time.gmtime(t)
+ ss = min(ss, 59) # clamp out leap seconds if the platform has them
+ return cls(y, m, d, hh, mm, ss, us)
+
+ # XXX This is supposed to do better than we *can* do by using time.time(),
+ # XXX if the platform supports a more accurate way. The C implementation
+ # XXX uses gettimeofday on platforms that have it, but that isn't
+ # XXX available from Python. So now() may return different results
+ # XXX across the implementations.
+ @classmethod
+ def now(cls, tz=None):
+ "Construct a datetime from time.time() and optional time zone info."
+ t = _time.time()
+ return cls.fromtimestamp(t, tz)
+
+ @classmethod
+ def utcnow(cls):
+ "Construct a UTC datetime from time.time()."
+ t = _time.time()
+ return cls.utcfromtimestamp(t)
+
+ @classmethod
+ def combine(cls, date, time):
+ "Construct a datetime from a given date and a given time."
+ if not isinstance(date, _date_class):
+ raise TypeError("date argument must be a date instance")
+ if not isinstance(time, _time_class):
+ raise TypeError("time argument must be a time instance")
+ return cls(date.year, date.month, date.day,
+ time.hour, time.minute, time.second, time.microsecond,
+ time.tzinfo)
+
+ def timetuple(self):
+ "Return local time tuple compatible with time.localtime()."
+ dst = self.dst()
+ if dst is None:
+ dst = -1
+ elif dst:
+ dst = 1
+ else:
+ dst = 0
+ return _build_struct_time(self.year, self.month, self.day,
+ self.hour, self.minute, self.second,
+ dst)
+
+ def timestamp(self):
+ "Return POSIX timestamp as float"
+ if self._tzinfo is None:
+ return _time.mktime((self.year, self.month, self.day,
+ self.hour, self.minute, self.second,
+ -1, -1, -1)) + self.microsecond / 1e6
+ else:
+ return (self - _EPOCH).total_seconds()
+
+ def utctimetuple(self):
+ "Return UTC time tuple compatible with time.gmtime()."
+ offset = self.utcoffset()
+ if offset:
+ self -= offset
+ y, m, d = self.year, self.month, self.day
+ hh, mm, ss = self.hour, self.minute, self.second
+ return _build_struct_time(y, m, d, hh, mm, ss, 0)
+
+ def date(self):
+ "Return the date part."
+ return date(self._year, self._month, self._day)
+
+ def time(self):
+ "Return the time part, with tzinfo None."
+ return time(self.hour, self.minute, self.second, self.microsecond)
+
+ def timetz(self):
+ "Return the time part, with same tzinfo."
+ return time(self.hour, self.minute, self.second, self.microsecond,
+ self._tzinfo)
+
+ def replace(self, year=None, month=None, day=None, hour=None,
+ minute=None, second=None, microsecond=None, tzinfo=True):
+ """Return a new datetime with new values for the specified fields."""
+ if year is None:
+ year = self.year
+ if month is None:
+ month = self.month
+ if day is None:
+ day = self.day
+ if hour is None:
+ hour = self.hour
+ if minute is None:
+ minute = self.minute
+ if second is None:
+ second = self.second
+ if microsecond is None:
+ microsecond = self.microsecond
+ if tzinfo is True:
+ tzinfo = self.tzinfo
+ _check_date_fields(year, month, day)
+ _check_time_fields(hour, minute, second, microsecond)
+ _check_tzinfo_arg(tzinfo)
+ return datetime(year, month, day, hour, minute, second,
+ microsecond, tzinfo)
+
+ def astimezone(self, tz=None):
+ if tz is None:
+ if self.tzinfo is None:
+ raise ValueError("astimezone() requires an aware datetime")
+ ts = (self - _EPOCH) // timedelta(seconds=1)
+ localtm = _time.localtime(ts)
+ local = datetime(*localtm[:6])
+ try:
+ # Extract TZ data if available
+ gmtoff = localtm.tm_gmtoff
+ zone = localtm.tm_zone
+ except AttributeError:
+ # Compute UTC offset and compare with the value implied
+ # by tm_isdst. If the values match, use the zone name
+ # implied by tm_isdst.
+ delta = local - datetime(*_time.gmtime(ts)[:6])
+ dst = _time.daylight and localtm.tm_isdst > 0
+ gmtoff = -(_time.altzone if dst else _time.timezone)
+ if delta == timedelta(seconds=gmtoff):
+ tz = timezone(delta, _time.tzname[dst])
+ else:
+ tz = timezone(delta)
+ else:
+ tz = timezone(timedelta(seconds=gmtoff), zone)
+
+ elif not isinstance(tz, tzinfo):
+ raise TypeError("tz argument must be an instance of tzinfo")
+
+ mytz = self.tzinfo
+ if mytz is None:
+ raise ValueError("astimezone() requires an aware datetime")
+
+ if tz is mytz:
+ return self
+
+ # Convert self to UTC, and attach the new time zone object.
+ myoffset = self.utcoffset()
+ if myoffset is None:
+ raise ValueError("astimezone() requires an aware datetime")
+ utc = (self - myoffset).replace(tzinfo=tz)
+
+ # Convert from UTC to tz's local time.
+ return tz.fromutc(utc)
+
+ # Ways to produce a string.
+
+ def ctime(self):
+ "Return ctime() style string."
+ weekday = self.toordinal() % 7 or 7
+ return "%s %s %2d %02d:%02d:%02d %04d" % (
+ _DAYNAMES[weekday],
+ _MONTHNAMES[self._month],
+ self._day,
+ self._hour, self._minute, self._second,
+ self._year)
+
+ def isoformat(self, sep='T'):
+ """Return the time formatted according to ISO.
+
+ This is 'YYYY-MM-DD HH:MM:SS.mmmmmm', or 'YYYY-MM-DD HH:MM:SS' if
+ self.microsecond == 0.
+
+ If self.tzinfo is not None, the UTC offset is also attached, giving
+ 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM' or 'YYYY-MM-DD HH:MM:SS+HH:MM'.
+
+ Optional argument sep specifies the separator between date and
+ time, default 'T'.
+ """
+ s = ("%04d-%02d-%02d%c" % (self._year, self._month, self._day,
+ sep) +
+ _format_time(self._hour, self._minute, self._second,
+ self._microsecond))
+ off = self.utcoffset()
+ if off is not None:
+ if off.days < 0:
+ sign = "-"
+ off = -off
+ else:
+ sign = "+"
+ hh, mm = divmod(off, timedelta(hours=1))
+ assert not mm % timedelta(minutes=1), "whole minute"
+ mm //= timedelta(minutes=1)
+ s += "%s%02d:%02d" % (sign, hh, mm)
+ return s
+
+ def __repr__(self):
+ """Convert to formal string, for repr()."""
+ L = [self._year, self._month, self._day, # These are never zero
+ self._hour, self._minute, self._second, self._microsecond]
+ if L[-1] == 0:
+ del L[-1]
+ if L[-1] == 0:
+ del L[-1]
+ s = ", ".join(map(str, L))
+ s = "%s(%s)" % ('datetime.' + self.__class__.__name__, s)
+ if self._tzinfo is not None:
+ assert s[-1:] == ")"
+ s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")"
+ return s
+
+ def __str__(self):
+ "Convert to string, for str()."
+ return self.isoformat(sep=' ')
+
+ @classmethod
+ def strptime(cls, date_string, format):
+ 'string, format -> new datetime parsed from a string (like time.strptime()).'
+ import _strptime
+ return _strptime._strptime_datetime(cls, date_string, format)
+
+ def utcoffset(self):
+ """Return the timezone offset in minutes east of UTC (negative west of
+ UTC)."""
+ if self._tzinfo is None:
+ return None
+ offset = self._tzinfo.utcoffset(self)
+ _check_utc_offset("utcoffset", offset)
+ return offset
+
+ def tzname(self):
+ """Return the timezone name.
+
+ Note that the name is 100% informational -- there's no requirement that
+ it mean anything in particular. For example, "GMT", "UTC", "-500",
+ "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies.
+ """
+ name = _call_tzinfo_method(self._tzinfo, "tzname", self)
+ _check_tzname(name)
+ return name
+
+ def dst(self):
+ """Return 0 if DST is not in effect, or the DST offset (in minutes
+ eastward) if DST is in effect.
+
+ This is purely informational; the DST offset has already been added to
+ the UTC offset returned by utcoffset() if applicable, so there's no
+ need to consult dst() unless you're interested in displaying the DST
+ info.
+ """
+ if self._tzinfo is None:
+ return None
+ offset = self._tzinfo.dst(self)
+ _check_utc_offset("dst", offset)
+ return offset
+
+ # Comparisons of datetime objects with other.
+
+ def __eq__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other, allow_mixed=True) == 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ return False
+
+ def __ne__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other, allow_mixed=True) != 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ return True
+
+ def __le__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other) <= 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ _cmperror(self, other)
+
+ def __lt__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other) < 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ _cmperror(self, other)
+
+ def __ge__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other) >= 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ _cmperror(self, other)
+
+ def __gt__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other) > 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ _cmperror(self, other)
+
+ def _cmp(self, other, allow_mixed=False):
+ assert isinstance(other, datetime)
+ mytz = self._tzinfo
+ ottz = other._tzinfo
+ myoff = otoff = None
+
+ if mytz is ottz:
+ base_compare = True
+ else:
+ myoff = self.utcoffset()
+ otoff = other.utcoffset()
+ base_compare = myoff == otoff
+
+ if base_compare:
+ return _cmp((self._year, self._month, self._day,
+ self._hour, self._minute, self._second,
+ self._microsecond),
+ (other._year, other._month, other._day,
+ other._hour, other._minute, other._second,
+ other._microsecond))
+ if myoff is None or otoff is None:
+ if allow_mixed:
+ return 2 # arbitrary non-zero value
+ else:
+ raise TypeError("cannot compare naive and aware datetimes")
+ # XXX What follows could be done more efficiently...
+ diff = self - other # this will take offsets into account
+ if diff.days < 0:
+ return -1
+ return diff and 1 or 0
+
+ def __add__(self, other):
+ "Add a datetime and a timedelta."
+ if not isinstance(other, timedelta):
+ return NotImplemented
+ delta = timedelta(self.toordinal(),
+ hours=self._hour,
+ minutes=self._minute,
+ seconds=self._second,
+ microseconds=self._microsecond)
+ delta += other
+ hour, rem = divmod(delta.seconds, 3600)
+ minute, second = divmod(rem, 60)
+ if 0 < delta.days <= _MAXORDINAL:
+ return datetime.combine(date.fromordinal(delta.days),
+ time(hour, minute, second,
+ delta.microseconds,
+ tzinfo=self._tzinfo))
+ raise OverflowError("result out of range")
+
+ __radd__ = __add__
+
+ def __sub__(self, other):
+ "Subtract two datetimes, or a datetime and a timedelta."
+ if not isinstance(other, datetime):
+ if isinstance(other, timedelta):
+ return self + -other
+ return NotImplemented
+
+ days1 = self.toordinal()
+ days2 = other.toordinal()
+ secs1 = self._second + self._minute * 60 + self._hour * 3600
+ secs2 = other._second + other._minute * 60 + other._hour * 3600
+ base = timedelta(days1 - days2,
+ secs1 - secs2,
+ self._microsecond - other._microsecond)
+ if self._tzinfo is other._tzinfo:
+ return base
+ myoff = self.utcoffset()
+ otoff = other.utcoffset()
+ if myoff == otoff:
+ return base
+ if myoff is None or otoff is None:
+ raise TypeError("cannot mix naive and timezone-aware time")
+ return base + otoff - myoff
+
+ def __hash__(self):
+ tzoff = self.utcoffset()
+ if tzoff is None:
+ return hash(self._getstate()[0])
+ days = _ymd2ord(self.year, self.month, self.day)
+ seconds = self.hour * 3600 + self.minute * 60 + self.second
+ return hash(timedelta(days, seconds, self.microsecond) - tzoff)
+
+ # Pickle support.
+
+ def _getstate(self):
+ yhi, ylo = divmod(self._year, 256)
+ us2, us3 = divmod(self._microsecond, 256)
+ us1, us2 = divmod(us2, 256)
+ basestate = bytes([yhi, ylo, self._month, self._day,
+ self._hour, self._minute, self._second,
+ us1, us2, us3])
+ if self._tzinfo is None:
+ return (basestate,)
+ else:
+ return (basestate, self._tzinfo)
+
+ def __setstate(self, string, tzinfo):
+ (yhi, ylo, self._month, self._day, self._hour,
+ self._minute, self._second, us1, us2, us3) = string
+ self._year = yhi * 256 + ylo
+ self._microsecond = (((us1 << 8) | us2) << 8) | us3
+ if tzinfo is None or isinstance(tzinfo, _tzinfo_class):
+ self._tzinfo = tzinfo
+ else:
+ raise TypeError("bad tzinfo state arg %r" % tzinfo)
+
+ def __reduce__(self):
+ return (self.__class__, self._getstate())
+
+
+datetime.min = datetime(1, 1, 1)
+datetime.max = datetime(9999, 12, 31, 23, 59, 59, 999999)
+datetime.resolution = timedelta(microseconds=1)
+
+
+def _isoweek1monday(year):
+ # Helper to calculate the day number of the Monday starting week 1
+ # XXX This could be done more efficiently
+ THURSDAY = 3
+ firstday = _ymd2ord(year, 1, 1)
+ firstweekday = (firstday + 6) % 7 # See weekday() above
+ week1monday = firstday - firstweekday
+ if firstweekday > THURSDAY:
+ week1monday += 7
+ return week1monday
+
+class timezone(tzinfo):
+ __slots__ = '_offset', '_name'
+
+ # Sentinel value to disallow None
+ _Omitted = object()
+ def __new__(cls, offset, name=_Omitted):
+ if not isinstance(offset, timedelta):
+ raise TypeError("offset must be a timedelta")
+ if name is cls._Omitted:
+ if not offset:
+ return cls.utc
+ name = None
+ elif not isinstance(name, str):
+ ###
+ # For Python-Future:
+ if PY2 and isinstance(name, native_str):
+ name = name.decode()
+ else:
+ raise TypeError("name must be a string")
+ ###
+ if not cls._minoffset <= offset <= cls._maxoffset:
+ raise ValueError("offset must be a timedelta"
+ " strictly between -timedelta(hours=24) and"
+ " timedelta(hours=24).")
+ if (offset.microseconds != 0 or
+ offset.seconds % 60 != 0):
+ raise ValueError("offset must be a timedelta"
+ " representing a whole number of minutes")
+ return cls._create(offset, name)
+
+ @classmethod
+ def _create(cls, offset, name=None):
+ self = tzinfo.__new__(cls)
+ self._offset = offset
+ self._name = name
+ return self
+
+ def __getinitargs__(self):
+ """pickle support"""
+ if self._name is None:
+ return (self._offset,)
+ return (self._offset, self._name)
+
+ def __eq__(self, other):
+ if type(other) != timezone:
+ return False
+ return self._offset == other._offset
+
+ def __hash__(self):
+ return hash(self._offset)
+
+ def __repr__(self):
+ """Convert to formal string, for repr().
+
+ >>> tz = timezone.utc
+ >>> repr(tz)
+ 'datetime.timezone.utc'
+ >>> tz = timezone(timedelta(hours=-5), 'EST')
+ >>> repr(tz)
+ "datetime.timezone(datetime.timedelta(-1, 68400), 'EST')"
+ """
+ if self is self.utc:
+ return 'datetime.timezone.utc'
+ if self._name is None:
+ return "%s(%r)" % ('datetime.' + self.__class__.__name__,
+ self._offset)
+ return "%s(%r, %r)" % ('datetime.' + self.__class__.__name__,
+ self._offset, self._name)
+
+ def __str__(self):
+ return self.tzname(None)
+
+ def utcoffset(self, dt):
+ if isinstance(dt, datetime) or dt is None:
+ return self._offset
+ raise TypeError("utcoffset() argument must be a datetime instance"
+ " or None")
+
+ def tzname(self, dt):
+ if isinstance(dt, datetime) or dt is None:
+ if self._name is None:
+ return self._name_from_offset(self._offset)
+ return self._name
+ raise TypeError("tzname() argument must be a datetime instance"
+ " or None")
+
+ def dst(self, dt):
+ if isinstance(dt, datetime) or dt is None:
+ return None
+ raise TypeError("dst() argument must be a datetime instance"
+ " or None")
+
+ def fromutc(self, dt):
+ if isinstance(dt, datetime):
+ if dt.tzinfo is not self:
+ raise ValueError("fromutc: dt.tzinfo "
+ "is not self")
+ return dt + self._offset
+ raise TypeError("fromutc() argument must be a datetime instance"
+ " or None")
+
+ _maxoffset = timedelta(hours=23, minutes=59)
+ _minoffset = -_maxoffset
+
+ @staticmethod
+ def _name_from_offset(delta):
+ if delta < timedelta(0):
+ sign = '-'
+ delta = -delta
+ else:
+ sign = '+'
+ hours, rest = divmod(delta, timedelta(hours=1))
+ minutes = rest // timedelta(minutes=1)
+ return 'UTC{}{:02d}:{:02d}'.format(sign, hours, minutes)
+
+timezone.utc = timezone._create(timedelta(0))
+timezone.min = timezone._create(timezone._minoffset)
+timezone.max = timezone._create(timezone._maxoffset)
+_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
+"""
+Some time zone algebra. For a datetime x, let
+ x.n = x stripped of its timezone -- its naive time.
+ x.o = x.utcoffset(), and assuming that doesn't raise an exception or
+ return None
+ x.d = x.dst(), and assuming that doesn't raise an exception or
+ return None
+ x.s = x's standard offset, x.o - x.d
+
+Now some derived rules, where k is a duration (timedelta).
+
+1. x.o = x.s + x.d
+ This follows from the definition of x.s.
+
+2. If x and y have the same tzinfo member, x.s = y.s.
+ This is actually a requirement, an assumption we need to make about
+ sane tzinfo classes.
+
+3. The naive UTC time corresponding to x is x.n - x.o.
+ This is again a requirement for a sane tzinfo class.
+
+4. (x+k).s = x.s
+ This follows from #2, and that datimetimetz+timedelta preserves tzinfo.
+
+5. (x+k).n = x.n + k
+ Again follows from how arithmetic is defined.
+
+Now we can explain tz.fromutc(x). Let's assume it's an interesting case
+(meaning that the various tzinfo methods exist, and don't blow up or return
+None when called).
+
+The function wants to return a datetime y with timezone tz, equivalent to x.
+x is already in UTC.
+
+By #3, we want
+
+ y.n - y.o = x.n [1]
+
+The algorithm starts by attaching tz to x.n, and calling that y. So
+x.n = y.n at the start. Then it wants to add a duration k to y, so that [1]
+becomes true; in effect, we want to solve [2] for k:
+
+ (y+k).n - (y+k).o = x.n [2]
+
+By #1, this is the same as
+
+ (y+k).n - ((y+k).s + (y+k).d) = x.n [3]
+
+By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start.
+Substituting that into [3],
+
+ x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving
+ k - (y+k).s - (y+k).d = 0; rearranging,
+ k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so
+ k = y.s - (y+k).d
+
+On the RHS, (y+k).d can't be computed directly, but y.s can be, and we
+approximate k by ignoring the (y+k).d term at first. Note that k can't be
+very large, since all offset-returning methods return a duration of magnitude
+less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must
+be 0, so ignoring it has no consequence then.
+
+In any case, the new value is
+
+ z = y + y.s [4]
+
+It's helpful to step back at look at [4] from a higher level: it's simply
+mapping from UTC to tz's standard time.
+
+At this point, if
+
+ z.n - z.o = x.n [5]
+
+we have an equivalent time, and are almost done. The insecurity here is
+at the start of daylight time. Picture US Eastern for concreteness. The wall
+time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good
+sense then. The docs ask that an Eastern tzinfo class consider such a time to
+be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST
+on the day DST starts. We want to return the 1:MM EST spelling because that's
+the only spelling that makes sense on the local wall clock.
+
+In fact, if [5] holds at this point, we do have the standard-time spelling,
+but that takes a bit of proof. We first prove a stronger result. What's the
+difference between the LHS and RHS of [5]? Let
+
+ diff = x.n - (z.n - z.o) [6]
+
+Now
+ z.n = by [4]
+ (y + y.s).n = by #5
+ y.n + y.s = since y.n = x.n
+ x.n + y.s = since z and y are have the same tzinfo member,
+ y.s = z.s by #2
+ x.n + z.s
+
+Plugging that back into [6] gives
+
+ diff =
+ x.n - ((x.n + z.s) - z.o) = expanding
+ x.n - x.n - z.s + z.o = cancelling
+ - z.s + z.o = by #2
+ z.d
+
+So diff = z.d.
+
+If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time
+spelling we wanted in the endcase described above. We're done. Contrarily,
+if z.d = 0, then we have a UTC equivalent, and are also done.
+
+If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to
+add to z (in effect, z is in tz's standard time, and we need to shift the
+local clock into tz's daylight time).
+
+Let
+
+ z' = z + z.d = z + diff [7]
+
+and we can again ask whether
+
+ z'.n - z'.o = x.n [8]
+
+If so, we're done. If not, the tzinfo class is insane, according to the
+assumptions we've made. This also requires a bit of proof. As before, let's
+compute the difference between the LHS and RHS of [8] (and skipping some of
+the justifications for the kinds of substitutions we've done several times
+already):
+
+ diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7]
+ x.n - (z.n + diff - z'.o) = replacing diff via [6]
+ x.n - (z.n + x.n - (z.n - z.o) - z'.o) =
+ x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n
+ - z.n + z.n - z.o + z'.o = cancel z.n
+ - z.o + z'.o = #1 twice
+ -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo
+ z'.d - z.d
+
+So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal,
+we've found the UTC-equivalent so are done. In fact, we stop with [7] and
+return z', not bothering to compute z'.d.
+
+How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by
+a dst() offset, and starting *from* a time already in DST (we know z.d != 0),
+would have to change the result dst() returns: we start in DST, and moving
+a little further into it takes us out of DST.
+
+There isn't a sane case where this can happen. The closest it gets is at
+the end of DST, where there's an hour in UTC with no spelling in a hybrid
+tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During
+that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM
+UTC) because the docs insist on that, but 0:MM is taken as being in daylight
+time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local
+clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in
+standard time. Since that's what the local clock *does*, we want to map both
+UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous
+in local time, but so it goes -- it's the way the local clock works.
+
+When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0,
+so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going.
+z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8]
+(correctly) concludes that z' is not UTC-equivalent to x.
+
+Because we know z.d said z was in daylight time (else [5] would have held and
+we would have stopped then), and we know z.d != z'.d (else [8] would have held
+and we have stopped then), and there are only 2 possible values dst() can
+return in Eastern, it follows that z'.d must be 0 (which it is in the example,
+but the reasoning doesn't depend on the example -- it depends on there being
+two possible dst() outcomes, one zero and the other non-zero). Therefore
+z' must be in standard time, and is the spelling we want in this case.
+
+Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is
+concerned (because it takes z' as being in standard time rather than the
+daylight time we intend here), but returning it gives the real-life "local
+clock repeats an hour" behavior when mapping the "unspellable" UTC hour into
+tz.
+
+When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with
+the 1:MM standard time spelling we want.
+
+So how can this break? One of the assumptions must be violated. Two
+possibilities:
+
+1) [2] effectively says that y.s is invariant across all y belong to a given
+ time zone. This isn't true if, for political reasons or continental drift,
+ a region decides to change its base offset from UTC.
+
+2) There may be versions of "double daylight" time where the tail end of
+ the analysis gives up a step too early. I haven't thought about that
+ enough to say.
+
+In any case, it's clear that the default fromutc() is strong enough to handle
+"almost all" time zones: so long as the standard offset is invariant, it
+doesn't matter if daylight time transition points change from year to year, or
+if daylight time is skipped in some years; it doesn't matter how large or
+small dst() may get within its bounds; and it doesn't even matter if some
+perverse time zone returns a negative dst()). So a breaking case must be
+pretty bizarre, and a tzinfo subclass can override fromutc() if it is.
+"""
+try:
+ from _datetime import *
+except ImportError:
+ pass
+else:
+ # Clean up unused names
+ del (_DAYNAMES, _DAYS_BEFORE_MONTH, _DAYS_IN_MONTH,
+ _DI100Y, _DI400Y, _DI4Y, _MAXORDINAL, _MONTHNAMES,
+ _build_struct_time, _call_tzinfo_method, _check_date_fields,
+ _check_time_fields, _check_tzinfo_arg, _check_tzname,
+ _check_utc_offset, _cmp, _cmperror, _date_class, _days_before_month,
+ _days_before_year, _days_in_month, _format_time, _is_leap,
+ _isoweek1monday, _math, _ord2ymd, _time, _time_class, _tzinfo_class,
+ _wrap_strftime, _ymd2ord)
+ # XXX Since import * above excludes names that start with _,
+ # docstring does not get overwritten. In the future, it may be
+ # appropriate to maintain a single module level docstring and
+ # remove the following line.
+ from _datetime import __doc__
diff --git a/src/future/backports/email/__init__.py b/src/future/backports/email/__init__.py
new file mode 100644
index 00000000..f9523bc1
--- /dev/null
+++ b/src/future/backports/email/__init__.py
@@ -0,0 +1,78 @@
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""
+Backport of the Python 3.3 email package for Python-Future.
+
+A package for parsing, handling, and generating email messages.
+"""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+# Install the surrogate escape handler here because this is used by many
+# modules in the email package.
+from future.utils import surrogateescape
+surrogateescape.register_surrogateescape()
+# (Should this be done globally by ``future``?)
+
+
+__version__ = '5.1.0'
+
+__all__ = [
+ 'base64mime',
+ 'charset',
+ 'encoders',
+ 'errors',
+ 'feedparser',
+ 'generator',
+ 'header',
+ 'iterators',
+ 'message',
+ 'message_from_file',
+ 'message_from_binary_file',
+ 'message_from_string',
+ 'message_from_bytes',
+ 'mime',
+ 'parser',
+ 'quoprimime',
+ 'utils',
+ ]
+
+
+
+# Some convenience routines. Don't import Parser and Message as side-effects
+# of importing email since those cascadingly import most of the rest of the
+# email package.
+def message_from_string(s, *args, **kws):
+ """Parse a string into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from future.backports.email.parser import Parser
+ return Parser(*args, **kws).parsestr(s)
+
+def message_from_bytes(s, *args, **kws):
+ """Parse a bytes string into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from future.backports.email.parser import BytesParser
+ return BytesParser(*args, **kws).parsebytes(s)
+
+def message_from_file(fp, *args, **kws):
+ """Read a file and parse its contents into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from future.backports.email.parser import Parser
+ return Parser(*args, **kws).parse(fp)
+
+def message_from_binary_file(fp, *args, **kws):
+ """Read a binary file and parse its contents into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from future.backports.email.parser import BytesParser
+ return BytesParser(*args, **kws).parse(fp)
diff --git a/src/future/backports/email/_encoded_words.py b/src/future/backports/email/_encoded_words.py
new file mode 100644
index 00000000..7c4a5291
--- /dev/null
+++ b/src/future/backports/email/_encoded_words.py
@@ -0,0 +1,232 @@
+""" Routines for manipulating RFC2047 encoded words.
+
+This is currently a package-private API, but will be considered for promotion
+to a public API if there is demand.
+
+"""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import bytes
+from future.builtins import chr
+from future.builtins import int
+from future.builtins import str
+
+# An ecoded word looks like this:
+#
+# =?charset[*lang]?cte?encoded_string?=
+#
+# for more information about charset see the charset module. Here it is one
+# of the preferred MIME charset names (hopefully; you never know when parsing).
+# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case). In
+# theory other letters could be used for other encodings, but in practice this
+# (almost?) never happens. There could be a public API for adding entries
+# to the CTE tables, but YAGNI for now. 'q' is Quoted Printable, 'b' is
+# Base64. The meaning of encoded_string should be obvious. 'lang' is optional
+# as indicated by the brackets (they are not part of the syntax) but is almost
+# never encountered in practice.
+#
+# The general interface for a CTE decoder is that it takes the encoded_string
+# as its argument, and returns a tuple (cte_decoded_string, defects). The
+# cte_decoded_string is the original binary that was encoded using the
+# specified cte. 'defects' is a list of MessageDefect instances indicating any
+# problems encountered during conversion. 'charset' and 'lang' are the
+# corresponding strings extracted from the EW, case preserved.
+#
+# The general interface for a CTE encoder is that it takes a binary sequence
+# as input and returns the cte_encoded_string, which is an ascii-only string.
+#
+# Each decoder must also supply a length function that takes the binary
+# sequence as its argument and returns the length of the resulting encoded
+# string.
+#
+# The main API functions for the module are decode, which calls the decoder
+# referenced by the cte specifier, and encode, which adds the appropriate
+# RFC 2047 "chrome" to the encoded string, and can optionally automatically
+# select the shortest possible encoding. See their docstrings below for
+# details.
+
+import re
+import base64
+import binascii
+import functools
+from string import ascii_letters, digits
+from future.backports.email import errors
+
+__all__ = ['decode_q',
+ 'encode_q',
+ 'decode_b',
+ 'encode_b',
+ 'len_q',
+ 'len_b',
+ 'decode',
+ 'encode',
+ ]
+
+#
+# Quoted Printable
+#
+
+# regex based decoder.
+_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub,
+ lambda m: bytes([int(m.group(1), 16)]))
+
+def decode_q(encoded):
+ encoded = bytes(encoded.replace(b'_', b' '))
+ return _q_byte_subber(encoded), []
+
+
+# dict mapping bytes to their encoded form
+class _QByteMap(dict):
+
+ safe = bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii'))
+
+ def __missing__(self, key):
+ if key in self.safe:
+ self[key] = chr(key)
+ else:
+ self[key] = "={:02X}".format(key)
+ return self[key]
+
+_q_byte_map = _QByteMap()
+
+# In headers spaces are mapped to '_'.
+_q_byte_map[ord(' ')] = '_'
+
+def encode_q(bstring):
+ return str(''.join(_q_byte_map[x] for x in bytes(bstring)))
+
+def len_q(bstring):
+ return sum(len(_q_byte_map[x]) for x in bytes(bstring))
+
+
+#
+# Base64
+#
+
+def decode_b(encoded):
+ defects = []
+ pad_err = len(encoded) % 4
+ if pad_err:
+ defects.append(errors.InvalidBase64PaddingDefect())
+ padded_encoded = encoded + b'==='[:4-pad_err]
+ else:
+ padded_encoded = encoded
+ try:
+ # The validate kwarg to b64decode is not supported in Py2.x
+ if not re.match(b'^[A-Za-z0-9+/]*={0,2}$', padded_encoded):
+ raise binascii.Error('Non-base64 digit found')
+ return base64.b64decode(padded_encoded), defects
+ except binascii.Error:
+ # Since we had correct padding, this must an invalid char error.
+ defects = [errors.InvalidBase64CharactersDefect()]
+ # The non-alphabet characters are ignored as far as padding
+ # goes, but we don't know how many there are. So we'll just
+ # try various padding lengths until something works.
+ for i in 0, 1, 2, 3:
+ try:
+ return base64.b64decode(encoded+b'='*i), defects
+ except (binascii.Error, TypeError): # Py2 raises a TypeError
+ if i==0:
+ defects.append(errors.InvalidBase64PaddingDefect())
+ else:
+ # This should never happen.
+ raise AssertionError("unexpected binascii.Error")
+
+def encode_b(bstring):
+ return base64.b64encode(bstring).decode('ascii')
+
+def len_b(bstring):
+ groups_of_3, leftover = divmod(len(bstring), 3)
+ # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
+ return groups_of_3 * 4 + (4 if leftover else 0)
+
+
+_cte_decoders = {
+ 'q': decode_q,
+ 'b': decode_b,
+ }
+
+def decode(ew):
+ """Decode encoded word and return (string, charset, lang, defects) tuple.
+
+ An RFC 2047/2243 encoded word has the form:
+
+ =?charset*lang?cte?encoded_string?=
+
+ where '*lang' may be omitted but the other parts may not be.
+
+ This function expects exactly such a string (that is, it does not check the
+ syntax and may raise errors if the string is not well formed), and returns
+ the encoded_string decoded first from its Content Transfer Encoding and
+ then from the resulting bytes into unicode using the specified charset. If
+ the cte-decoded string does not successfully decode using the specified
+ character set, a defect is added to the defects list and the unknown octets
+ are replaced by the unicode 'unknown' character \uFDFF.
+
+ The specified charset and language are returned. The default for language,
+ which is rarely if ever encountered, is the empty string.
+
+ """
+ _, charset, cte, cte_string, _ = str(ew).split('?')
+ charset, _, lang = charset.partition('*')
+ cte = cte.lower()
+ # Recover the original bytes and do CTE decoding.
+ bstring = cte_string.encode('ascii', 'surrogateescape')
+ bstring, defects = _cte_decoders[cte](bstring)
+ # Turn the CTE decoded bytes into unicode.
+ try:
+ string = bstring.decode(charset)
+ except UnicodeError:
+ defects.append(errors.UndecodableBytesDefect("Encoded word "
+ "contains bytes not decodable using {} charset".format(charset)))
+ string = bstring.decode(charset, 'surrogateescape')
+ except LookupError:
+ string = bstring.decode('ascii', 'surrogateescape')
+ if charset.lower() != 'unknown-8bit':
+ defects.append(errors.CharsetError("Unknown charset {} "
+ "in encoded word; decoded as unknown bytes".format(charset)))
+ return string, charset, lang, defects
+
+
+_cte_encoders = {
+ 'q': encode_q,
+ 'b': encode_b,
+ }
+
+_cte_encode_length = {
+ 'q': len_q,
+ 'b': len_b,
+ }
+
+def encode(string, charset='utf-8', encoding=None, lang=''):
+ """Encode string using the CTE encoding that produces the shorter result.
+
+ Produces an RFC 2047/2243 encoded word of the form:
+
+ =?charset*lang?cte?encoded_string?=
+
+ where '*lang' is omitted unless the 'lang' parameter is given a value.
+ Optional argument charset (defaults to utf-8) specifies the charset to use
+ to encode the string to binary before CTE encoding it. Optional argument
+ 'encoding' is the cte specifier for the encoding that should be used ('q'
+ or 'b'); if it is None (the default) the encoding which produces the
+ shortest encoded sequence is used, except that 'q' is preferred if it is up
+ to five characters longer. Optional argument 'lang' (default '') gives the
+ RFC 2243 language string to specify in the encoded word.
+
+ """
+ string = str(string)
+ if charset == 'unknown-8bit':
+ bstring = string.encode('ascii', 'surrogateescape')
+ else:
+ bstring = string.encode(charset)
+ if encoding is None:
+ qlen = _cte_encode_length['q'](bstring)
+ blen = _cte_encode_length['b'](bstring)
+ # Bias toward q. 5 is arbitrary.
+ encoding = 'q' if qlen - blen < 5 else 'b'
+ encoded = _cte_encoders[encoding](bstring)
+ if lang:
+ lang = '*' + lang
+ return "=?{0}{1}?{2}?{3}?=".format(charset, lang, encoding, encoded)
diff --git a/src/future/backports/email/_header_value_parser.py b/src/future/backports/email/_header_value_parser.py
new file mode 100644
index 00000000..59b1b318
--- /dev/null
+++ b/src/future/backports/email/_header_value_parser.py
@@ -0,0 +1,2965 @@
+"""Header value parser implementing various email-related RFC parsing rules.
+
+The parsing methods defined in this module implement various email related
+parsing rules. Principal among them is RFC 5322, which is the followon
+to RFC 2822 and primarily a clarification of the former. It also implements
+RFC 2047 encoded word decoding.
+
+RFC 5322 goes to considerable trouble to maintain backward compatibility with
+RFC 822 in the parse phase, while cleaning up the structure on the generation
+phase. This parser supports correct RFC 5322 generation by tagging white space
+as folding white space only when folding is allowed in the non-obsolete rule
+sets. Actually, the parser is even more generous when accepting input than RFC
+5322 mandates, following the spirit of Postel's Law, which RFC 5322 encourages.
+Where possible deviations from the standard are annotated on the 'defects'
+attribute of tokens that deviate.
+
+The general structure of the parser follows RFC 5322, and uses its terminology
+where there is a direct correspondence. Where the implementation requires a
+somewhat different structure than that used by the formal grammar, new terms
+that mimic the closest existing terms are used. Thus, it really helps to have
+a copy of RFC 5322 handy when studying this code.
+
+Input to the parser is a string that has already been unfolded according to
+RFC 5322 rules. According to the RFC this unfolding is the very first step, and
+this parser leaves the unfolding step to a higher level message parser, which
+will have already detected the line breaks that need unfolding while
+determining the beginning and end of each header.
+
+The output of the parser is a TokenList object, which is a list subclass. A
+TokenList is a recursive data structure. The terminal nodes of the structure
+are Terminal objects, which are subclasses of str. These do not correspond
+directly to terminal objects in the formal grammar, but are instead more
+practical higher level combinations of true terminals.
+
+All TokenList and Terminal objects have a 'value' attribute, which produces the
+semantically meaningful value of that part of the parse subtree. The value of
+all whitespace tokens (no matter how many sub-tokens they may contain) is a
+single space, as per the RFC rules. This includes 'CFWS', which is herein
+included in the general class of whitespace tokens. There is one exception to
+the rule that whitespace tokens are collapsed into single spaces in values: in
+the value of a 'bare-quoted-string' (a quoted-string with no leading or
+trailing whitespace), any whitespace that appeared between the quotation marks
+is preserved in the returned value. Note that in all Terminal strings quoted
+pairs are turned into their unquoted values.
+
+All TokenList and Terminal objects also have a string value, which attempts to
+be a "canonical" representation of the RFC-compliant form of the substring that
+produced the parsed subtree, including minimal use of quoted pair quoting.
+Whitespace runs are not collapsed.
+
+Comment tokens also have a 'content' attribute providing the string found
+between the parens (including any nested comments) with whitespace preserved.
+
+All TokenList and Terminal objects have a 'defects' attribute which is a
+possibly empty list all of the defects found while creating the token. Defects
+may appear on any token in the tree, and a composite list of all defects in the
+subtree is available through the 'all_defects' attribute of any node. (For
+Terminal notes x.defects == x.all_defects.)
+
+Each object in a parse tree is called a 'token', and each has a 'token_type'
+attribute that gives the name from the RFC 5322 grammar that it represents.
+Not all RFC 5322 nodes are produced, and there is one non-RFC 5322 node that
+may be produced: 'ptext'. A 'ptext' is a string of printable ascii characters.
+It is returned in place of lists of (ctext/quoted-pair) and
+(qtext/quoted-pair).
+
+XXX: provide complete list of token types.
+"""
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import int, range, str, super, list
+
+import re
+from collections import namedtuple, OrderedDict
+
+from future.backports.urllib.parse import (unquote, unquote_to_bytes)
+from future.backports.email import _encoded_words as _ew
+from future.backports.email import errors
+from future.backports.email import utils
+
+#
+# Useful constants and functions
+#
+
+WSP = set(' \t')
+CFWS_LEADER = WSP | set('(')
+SPECIALS = set(r'()<>@,:;.\"[]')
+ATOM_ENDS = SPECIALS | WSP
+DOT_ATOM_ENDS = ATOM_ENDS - set('.')
+# '.', '"', and '(' do not end phrases in order to support obs-phrase
+PHRASE_ENDS = SPECIALS - set('."(')
+TSPECIALS = (SPECIALS | set('/?=')) - set('.')
+TOKEN_ENDS = TSPECIALS | WSP
+ASPECIALS = TSPECIALS | set("*'%")
+ATTRIBUTE_ENDS = ASPECIALS | WSP
+EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
+
+def quote_string(value):
+ return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
+
+#
+# Accumulator for header folding
+#
+
+class _Folded(object):
+
+ def __init__(self, maxlen, policy):
+ self.maxlen = maxlen
+ self.policy = policy
+ self.lastlen = 0
+ self.stickyspace = None
+ self.firstline = True
+ self.done = []
+ self.current = list() # uses l.clear()
+
+ def newline(self):
+ self.done.extend(self.current)
+ self.done.append(self.policy.linesep)
+ self.current.clear()
+ self.lastlen = 0
+
+ def finalize(self):
+ if self.current:
+ self.newline()
+
+ def __str__(self):
+ return ''.join(self.done)
+
+ def append(self, stoken):
+ self.current.append(stoken)
+
+ def append_if_fits(self, token, stoken=None):
+ if stoken is None:
+ stoken = str(token)
+ l = len(stoken)
+ if self.stickyspace is not None:
+ stickyspace_len = len(self.stickyspace)
+ if self.lastlen + stickyspace_len + l <= self.maxlen:
+ self.current.append(self.stickyspace)
+ self.lastlen += stickyspace_len
+ self.current.append(stoken)
+ self.lastlen += l
+ self.stickyspace = None
+ self.firstline = False
+ return True
+ if token.has_fws:
+ ws = token.pop_leading_fws()
+ if ws is not None:
+ self.stickyspace += str(ws)
+ stickyspace_len += len(ws)
+ token._fold(self)
+ return True
+ if stickyspace_len and l + 1 <= self.maxlen:
+ margin = self.maxlen - l
+ if 0 < margin < stickyspace_len:
+ trim = stickyspace_len - margin
+ self.current.append(self.stickyspace[:trim])
+ self.stickyspace = self.stickyspace[trim:]
+ stickyspace_len = trim
+ self.newline()
+ self.current.append(self.stickyspace)
+ self.current.append(stoken)
+ self.lastlen = l + stickyspace_len
+ self.stickyspace = None
+ self.firstline = False
+ return True
+ if not self.firstline:
+ self.newline()
+ self.current.append(self.stickyspace)
+ self.current.append(stoken)
+ self.stickyspace = None
+ self.firstline = False
+ return True
+ if self.lastlen + l <= self.maxlen:
+ self.current.append(stoken)
+ self.lastlen += l
+ return True
+ if l < self.maxlen:
+ self.newline()
+ self.current.append(stoken)
+ self.lastlen = l
+ return True
+ return False
+
+#
+# TokenList and its subclasses
+#
+
+class TokenList(list):
+
+ token_type = None
+
+ def __init__(self, *args, **kw):
+ super(TokenList, self).__init__(*args, **kw)
+ self.defects = []
+
+ def __str__(self):
+ return ''.join(str(x) for x in self)
+
+ def __repr__(self):
+ return '{}({})'.format(self.__class__.__name__,
+ super(TokenList, self).__repr__())
+
+ @property
+ def value(self):
+ return ''.join(x.value for x in self if x.value)
+
+ @property
+ def all_defects(self):
+ return sum((x.all_defects for x in self), self.defects)
+
+ #
+ # Folding API
+ #
+ # parts():
+ #
+ # return a list of objects that constitute the "higher level syntactic
+ # objects" specified by the RFC as the best places to fold a header line.
+ # The returned objects must include leading folding white space, even if
+ # this means mutating the underlying parse tree of the object. Each object
+ # is only responsible for returning *its* parts, and should not drill down
+ # to any lower level except as required to meet the leading folding white
+ # space constraint.
+ #
+ # _fold(folded):
+ #
+ # folded: the result accumulator. This is an instance of _Folded.
+ # (XXX: I haven't finished factoring this out yet, the folding code
+ # pretty much uses this as a state object.) When the folded.current
+ # contains as much text as will fit, the _fold method should call
+ # folded.newline.
+ # folded.lastlen: the current length of the test stored in folded.current.
+ # folded.maxlen: The maximum number of characters that may appear on a
+ # folded line. Differs from the policy setting in that "no limit" is
+ # represented by +inf, which means it can be used in the trivially
+ # logical fashion in comparisons.
+ #
+ # Currently no subclasses implement parts, and I think this will remain
+ # true. A subclass only needs to implement _fold when the generic version
+ # isn't sufficient. _fold will need to be implemented primarily when it is
+ # possible for encoded words to appear in the specialized token-list, since
+ # there is no generic algorithm that can know where exactly the encoded
+ # words are allowed. A _fold implementation is responsible for filling
+ # lines in the same general way that the top level _fold does. It may, and
+ # should, call the _fold method of sub-objects in a similar fashion to that
+ # of the top level _fold.
+ #
+ # XXX: I'm hoping it will be possible to factor the existing code further
+ # to reduce redundancy and make the logic clearer.
+
+ @property
+ def parts(self):
+ klass = self.__class__
+ this = list()
+ for token in self:
+ if token.startswith_fws():
+ if this:
+ yield this[0] if len(this)==1 else klass(this)
+ this.clear()
+ end_ws = token.pop_trailing_ws()
+ this.append(token)
+ if end_ws:
+ yield klass(this)
+ this = [end_ws]
+ if this:
+ yield this[0] if len(this)==1 else klass(this)
+
+ def startswith_fws(self):
+ return self[0].startswith_fws()
+
+ def pop_leading_fws(self):
+ if self[0].token_type == 'fws':
+ return self.pop(0)
+ return self[0].pop_leading_fws()
+
+ def pop_trailing_ws(self):
+ if self[-1].token_type == 'cfws':
+ return self.pop(-1)
+ return self[-1].pop_trailing_ws()
+
+ @property
+ def has_fws(self):
+ for part in self:
+ if part.has_fws:
+ return True
+ return False
+
+ def has_leading_comment(self):
+ return self[0].has_leading_comment()
+
+ @property
+ def comments(self):
+ comments = []
+ for token in self:
+ comments.extend(token.comments)
+ return comments
+
+ def fold(self, **_3to2kwargs):
+ # max_line_length 0/None means no limit, ie: infinitely long.
+ policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
+ maxlen = policy.max_line_length or float("+inf")
+ folded = _Folded(maxlen, policy)
+ self._fold(folded)
+ folded.finalize()
+ return str(folded)
+
+ def as_encoded_word(self, charset):
+ # This works only for things returned by 'parts', which include
+ # the leading fws, if any, that should be used.
+ res = []
+ ws = self.pop_leading_fws()
+ if ws:
+ res.append(ws)
+ trailer = self.pop(-1) if self[-1].token_type=='fws' else ''
+ res.append(_ew.encode(str(self), charset))
+ res.append(trailer)
+ return ''.join(res)
+
+ def cte_encode(self, charset, policy):
+ res = []
+ for part in self:
+ res.append(part.cte_encode(charset, policy))
+ return ''.join(res)
+
+ def _fold(self, folded):
+ for part in self.parts:
+ tstr = str(part)
+ tlen = len(tstr)
+ try:
+ str(part).encode('us-ascii')
+ except UnicodeEncodeError:
+ if any(isinstance(x, errors.UndecodableBytesDefect)
+ for x in part.all_defects):
+ charset = 'unknown-8bit'
+ else:
+ # XXX: this should be a policy setting
+ charset = 'utf-8'
+ tstr = part.cte_encode(charset, folded.policy)
+ tlen = len(tstr)
+ if folded.append_if_fits(part, tstr):
+ continue
+ # Peel off the leading whitespace if any and make it sticky, to
+ # avoid infinite recursion.
+ ws = part.pop_leading_fws()
+ if ws is not None:
+ # Peel off the leading whitespace and make it sticky, to
+ # avoid infinite recursion.
+ folded.stickyspace = str(part.pop(0))
+ if folded.append_if_fits(part):
+ continue
+ if part.has_fws:
+ part._fold(folded)
+ continue
+ # There are no fold points in this one; it is too long for a single
+ # line and can't be split...we just have to put it on its own line.
+ folded.append(tstr)
+ folded.newline()
+
+ def pprint(self, indent=''):
+ print('\n'.join(self._pp(indent='')))
+
+ def ppstr(self, indent=''):
+ return '\n'.join(self._pp(indent=''))
+
+ def _pp(self, indent=''):
+ yield '{}{}/{}('.format(
+ indent,
+ self.__class__.__name__,
+ self.token_type)
+ for token in self:
+ if not hasattr(token, '_pp'):
+ yield (indent + ' !! invalid element in token '
+ 'list: {!r}'.format(token))
+ else:
+ for line in token._pp(indent+' '):
+ yield line
+ if self.defects:
+ extra = ' Defects: {}'.format(self.defects)
+ else:
+ extra = ''
+ yield '{}){}'.format(indent, extra)
+
+
+class WhiteSpaceTokenList(TokenList):
+
+ @property
+ def value(self):
+ return ' '
+
+ @property
+ def comments(self):
+ return [x.content for x in self if x.token_type=='comment']
+
+
+class UnstructuredTokenList(TokenList):
+
+ token_type = 'unstructured'
+
+ def _fold(self, folded):
+ if any(x.token_type=='encoded-word' for x in self):
+ return self._fold_encoded(folded)
+ # Here we can have either a pure ASCII string that may or may not
+ # have surrogateescape encoded bytes, or a unicode string.
+ last_ew = None
+ for part in self.parts:
+ tstr = str(part)
+ is_ew = False
+ try:
+ str(part).encode('us-ascii')
+ except UnicodeEncodeError:
+ if any(isinstance(x, errors.UndecodableBytesDefect)
+ for x in part.all_defects):
+ charset = 'unknown-8bit'
+ else:
+ charset = 'utf-8'
+ if last_ew is not None:
+ # We've already done an EW, combine this one with it
+ # if there's room.
+ chunk = get_unstructured(
+ ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset)
+ oldlastlen = sum(len(x) for x in folded.current[:last_ew])
+ schunk = str(chunk)
+ lchunk = len(schunk)
+ if oldlastlen + lchunk <= folded.maxlen:
+ del folded.current[last_ew:]
+ folded.append(schunk)
+ folded.lastlen = oldlastlen + lchunk
+ continue
+ tstr = part.as_encoded_word(charset)
+ is_ew = True
+ if folded.append_if_fits(part, tstr):
+ if is_ew:
+ last_ew = len(folded.current) - 1
+ continue
+ if is_ew or last_ew:
+ # It's too big to fit on the line, but since we've
+ # got encoded words we can use encoded word folding.
+ part._fold_as_ew(folded)
+ continue
+ # Peel off the leading whitespace if any and make it sticky, to
+ # avoid infinite recursion.
+ ws = part.pop_leading_fws()
+ if ws is not None:
+ folded.stickyspace = str(ws)
+ if folded.append_if_fits(part):
+ continue
+ if part.has_fws:
+ part.fold(folded)
+ continue
+ # It can't be split...we just have to put it on its own line.
+ folded.append(tstr)
+ folded.newline()
+ last_ew = None
+
+ def cte_encode(self, charset, policy):
+ res = []
+ last_ew = None
+ for part in self:
+ spart = str(part)
+ try:
+ spart.encode('us-ascii')
+ res.append(spart)
+ except UnicodeEncodeError:
+ if last_ew is None:
+ res.append(part.cte_encode(charset, policy))
+ last_ew = len(res)
+ else:
+ tl = get_unstructured(''.join(res[last_ew:] + [spart]))
+ res.append(tl.as_encoded_word())
+ return ''.join(res)
+
+
+class Phrase(TokenList):
+
+ token_type = 'phrase'
+
+ def _fold(self, folded):
+ # As with Unstructured, we can have pure ASCII with or without
+ # surrogateescape encoded bytes, or we could have unicode. But this
+ # case is more complicated, since we have to deal with the various
+ # sub-token types and how they can be composed in the face of
+ # unicode-that-needs-CTE-encoding, and the fact that if a token a
+ # comment that becomes a barrier across which we can't compose encoded
+ # words.
+ last_ew = None
+ for part in self.parts:
+ tstr = str(part)
+ tlen = len(tstr)
+ has_ew = False
+ try:
+ str(part).encode('us-ascii')
+ except UnicodeEncodeError:
+ if any(isinstance(x, errors.UndecodableBytesDefect)
+ for x in part.all_defects):
+ charset = 'unknown-8bit'
+ else:
+ charset = 'utf-8'
+ if last_ew is not None and not part.has_leading_comment():
+ # We've already done an EW, let's see if we can combine
+ # this one with it. The last_ew logic ensures that all we
+ # have at this point is atoms, no comments or quoted
+ # strings. So we can treat the text between the last
+ # encoded word and the content of this token as
+ # unstructured text, and things will work correctly. But
+ # we have to strip off any trailing comment on this token
+ # first, and if it is a quoted string we have to pull out
+ # the content (we're encoding it, so it no longer needs to
+ # be quoted).
+ if part[-1].token_type == 'cfws' and part.comments:
+ remainder = part.pop(-1)
+ else:
+ remainder = ''
+ for i, token in enumerate(part):
+ if token.token_type == 'bare-quoted-string':
+ part[i] = UnstructuredTokenList(token[:])
+ chunk = get_unstructured(
+ ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset)
+ schunk = str(chunk)
+ lchunk = len(schunk)
+ if last_ew + lchunk <= folded.maxlen:
+ del folded.current[last_ew:]
+ folded.append(schunk)
+ folded.lastlen = sum(len(x) for x in folded.current)
+ continue
+ tstr = part.as_encoded_word(charset)
+ tlen = len(tstr)
+ has_ew = True
+ if folded.append_if_fits(part, tstr):
+ if has_ew and not part.comments:
+ last_ew = len(folded.current) - 1
+ elif part.comments or part.token_type == 'quoted-string':
+ # If a comment is involved we can't combine EWs. And if a
+ # quoted string is involved, it's not worth the effort to
+ # try to combine them.
+ last_ew = None
+ continue
+ part._fold(folded)
+
+ def cte_encode(self, charset, policy):
+ res = []
+ last_ew = None
+ is_ew = False
+ for part in self:
+ spart = str(part)
+ try:
+ spart.encode('us-ascii')
+ res.append(spart)
+ except UnicodeEncodeError:
+ is_ew = True
+ if last_ew is None:
+ if not part.comments:
+ last_ew = len(res)
+ res.append(part.cte_encode(charset, policy))
+ elif not part.has_leading_comment():
+ if part[-1].token_type == 'cfws' and part.comments:
+ remainder = part.pop(-1)
+ else:
+ remainder = ''
+ for i, token in enumerate(part):
+ if token.token_type == 'bare-quoted-string':
+ part[i] = UnstructuredTokenList(token[:])
+ tl = get_unstructured(''.join(res[last_ew:] + [spart]))
+ res[last_ew:] = [tl.as_encoded_word(charset)]
+ if part.comments or (not is_ew and part.token_type == 'quoted-string'):
+ last_ew = None
+ return ''.join(res)
+
+class Word(TokenList):
+
+ token_type = 'word'
+
+
+class CFWSList(WhiteSpaceTokenList):
+
+ token_type = 'cfws'
+
+ def has_leading_comment(self):
+ return bool(self.comments)
+
+
+class Atom(TokenList):
+
+ token_type = 'atom'
+
+
+class Token(TokenList):
+
+ token_type = 'token'
+
+
+class EncodedWord(TokenList):
+
+ token_type = 'encoded-word'
+ cte = None
+ charset = None
+ lang = None
+
+ @property
+ def encoded(self):
+ if self.cte is not None:
+ return self.cte
+ _ew.encode(str(self), self.charset)
+
+
+
+class QuotedString(TokenList):
+
+ token_type = 'quoted-string'
+
+ @property
+ def content(self):
+ for x in self:
+ if x.token_type == 'bare-quoted-string':
+ return x.value
+
+ @property
+ def quoted_value(self):
+ res = []
+ for x in self:
+ if x.token_type == 'bare-quoted-string':
+ res.append(str(x))
+ else:
+ res.append(x.value)
+ return ''.join(res)
+
+ @property
+ def stripped_value(self):
+ for token in self:
+ if token.token_type == 'bare-quoted-string':
+ return token.value
+
+
+class BareQuotedString(QuotedString):
+
+ token_type = 'bare-quoted-string'
+
+ def __str__(self):
+ return quote_string(''.join(str(x) for x in self))
+
+ @property
+ def value(self):
+ return ''.join(str(x) for x in self)
+
+
+class Comment(WhiteSpaceTokenList):
+
+ token_type = 'comment'
+
+ def __str__(self):
+ return ''.join(sum([
+ ["("],
+ [self.quote(x) for x in self],
+ [")"],
+ ], []))
+
+ def quote(self, value):
+ if value.token_type == 'comment':
+ return str(value)
+ return str(value).replace('\\', '\\\\').replace(
+ '(', '\(').replace(
+ ')', '\)')
+
+ @property
+ def content(self):
+ return ''.join(str(x) for x in self)
+
+ @property
+ def comments(self):
+ return [self.content]
+
+class AddressList(TokenList):
+
+ token_type = 'address-list'
+
+ @property
+ def addresses(self):
+ return [x for x in self if x.token_type=='address']
+
+ @property
+ def mailboxes(self):
+ return sum((x.mailboxes
+ for x in self if x.token_type=='address'), [])
+
+ @property
+ def all_mailboxes(self):
+ return sum((x.all_mailboxes
+ for x in self if x.token_type=='address'), [])
+
+
+class Address(TokenList):
+
+ token_type = 'address'
+
+ @property
+ def display_name(self):
+ if self[0].token_type == 'group':
+ return self[0].display_name
+
+ @property
+ def mailboxes(self):
+ if self[0].token_type == 'mailbox':
+ return [self[0]]
+ elif self[0].token_type == 'invalid-mailbox':
+ return []
+ return self[0].mailboxes
+
+ @property
+ def all_mailboxes(self):
+ if self[0].token_type == 'mailbox':
+ return [self[0]]
+ elif self[0].token_type == 'invalid-mailbox':
+ return [self[0]]
+ return self[0].all_mailboxes
+
+class MailboxList(TokenList):
+
+ token_type = 'mailbox-list'
+
+ @property
+ def mailboxes(self):
+ return [x for x in self if x.token_type=='mailbox']
+
+ @property
+ def all_mailboxes(self):
+ return [x for x in self
+ if x.token_type in ('mailbox', 'invalid-mailbox')]
+
+
+class GroupList(TokenList):
+
+ token_type = 'group-list'
+
+ @property
+ def mailboxes(self):
+ if not self or self[0].token_type != 'mailbox-list':
+ return []
+ return self[0].mailboxes
+
+ @property
+ def all_mailboxes(self):
+ if not self or self[0].token_type != 'mailbox-list':
+ return []
+ return self[0].all_mailboxes
+
+
+class Group(TokenList):
+
+ token_type = "group"
+
+ @property
+ def mailboxes(self):
+ if self[2].token_type != 'group-list':
+ return []
+ return self[2].mailboxes
+
+ @property
+ def all_mailboxes(self):
+ if self[2].token_type != 'group-list':
+ return []
+ return self[2].all_mailboxes
+
+ @property
+ def display_name(self):
+ return self[0].display_name
+
+
+class NameAddr(TokenList):
+
+ token_type = 'name-addr'
+
+ @property
+ def display_name(self):
+ if len(self) == 1:
+ return None
+ return self[0].display_name
+
+ @property
+ def local_part(self):
+ return self[-1].local_part
+
+ @property
+ def domain(self):
+ return self[-1].domain
+
+ @property
+ def route(self):
+ return self[-1].route
+
+ @property
+ def addr_spec(self):
+ return self[-1].addr_spec
+
+
+class AngleAddr(TokenList):
+
+ token_type = 'angle-addr'
+
+ @property
+ def local_part(self):
+ for x in self:
+ if x.token_type == 'addr-spec':
+ return x.local_part
+
+ @property
+ def domain(self):
+ for x in self:
+ if x.token_type == 'addr-spec':
+ return x.domain
+
+ @property
+ def route(self):
+ for x in self:
+ if x.token_type == 'obs-route':
+ return x.domains
+
+ @property
+ def addr_spec(self):
+ for x in self:
+ if x.token_type == 'addr-spec':
+ return x.addr_spec
+ else:
+ return '<>'
+
+
+class ObsRoute(TokenList):
+
+ token_type = 'obs-route'
+
+ @property
+ def domains(self):
+ return [x.domain for x in self if x.token_type == 'domain']
+
+
+class Mailbox(TokenList):
+
+ token_type = 'mailbox'
+
+ @property
+ def display_name(self):
+ if self[0].token_type == 'name-addr':
+ return self[0].display_name
+
+ @property
+ def local_part(self):
+ return self[0].local_part
+
+ @property
+ def domain(self):
+ return self[0].domain
+
+ @property
+ def route(self):
+ if self[0].token_type == 'name-addr':
+ return self[0].route
+
+ @property
+ def addr_spec(self):
+ return self[0].addr_spec
+
+
+class InvalidMailbox(TokenList):
+
+ token_type = 'invalid-mailbox'
+
+ @property
+ def display_name(self):
+ return None
+
+ local_part = domain = route = addr_spec = display_name
+
+
+class Domain(TokenList):
+
+ token_type = 'domain'
+
+ @property
+ def domain(self):
+ return ''.join(super(Domain, self).value.split())
+
+
+class DotAtom(TokenList):
+
+ token_type = 'dot-atom'
+
+
+class DotAtomText(TokenList):
+
+ token_type = 'dot-atom-text'
+
+
+class AddrSpec(TokenList):
+
+ token_type = 'addr-spec'
+
+ @property
+ def local_part(self):
+ return self[0].local_part
+
+ @property
+ def domain(self):
+ if len(self) < 3:
+ return None
+ return self[-1].domain
+
+ @property
+ def value(self):
+ if len(self) < 3:
+ return self[0].value
+ return self[0].value.rstrip()+self[1].value+self[2].value.lstrip()
+
+ @property
+ def addr_spec(self):
+ nameset = set(self.local_part)
+ if len(nameset) > len(nameset-DOT_ATOM_ENDS):
+ lp = quote_string(self.local_part)
+ else:
+ lp = self.local_part
+ if self.domain is not None:
+ return lp + '@' + self.domain
+ return lp
+
+
+class ObsLocalPart(TokenList):
+
+ token_type = 'obs-local-part'
+
+
+class DisplayName(Phrase):
+
+ token_type = 'display-name'
+
+ @property
+ def display_name(self):
+ res = TokenList(self)
+ if res[0].token_type == 'cfws':
+ res.pop(0)
+ else:
+ if res[0][0].token_type == 'cfws':
+ res[0] = TokenList(res[0][1:])
+ if res[-1].token_type == 'cfws':
+ res.pop()
+ else:
+ if res[-1][-1].token_type == 'cfws':
+ res[-1] = TokenList(res[-1][:-1])
+ return res.value
+
+ @property
+ def value(self):
+ quote = False
+ if self.defects:
+ quote = True
+ else:
+ for x in self:
+ if x.token_type == 'quoted-string':
+ quote = True
+ if quote:
+ pre = post = ''
+ if self[0].token_type=='cfws' or self[0][0].token_type=='cfws':
+ pre = ' '
+ if self[-1].token_type=='cfws' or self[-1][-1].token_type=='cfws':
+ post = ' '
+ return pre+quote_string(self.display_name)+post
+ else:
+ return super(DisplayName, self).value
+
+
+class LocalPart(TokenList):
+
+ token_type = 'local-part'
+
+ @property
+ def value(self):
+ if self[0].token_type == "quoted-string":
+ return self[0].quoted_value
+ else:
+ return self[0].value
+
+ @property
+ def local_part(self):
+ # Strip whitespace from front, back, and around dots.
+ res = [DOT]
+ last = DOT
+ last_is_tl = False
+ for tok in self[0] + [DOT]:
+ if tok.token_type == 'cfws':
+ continue
+ if (last_is_tl and tok.token_type == 'dot' and
+ last[-1].token_type == 'cfws'):
+ res[-1] = TokenList(last[:-1])
+ is_tl = isinstance(tok, TokenList)
+ if (is_tl and last.token_type == 'dot' and
+ tok[0].token_type == 'cfws'):
+ res.append(TokenList(tok[1:]))
+ else:
+ res.append(tok)
+ last = res[-1]
+ last_is_tl = is_tl
+ res = TokenList(res[1:-1])
+ return res.value
+
+
+class DomainLiteral(TokenList):
+
+ token_type = 'domain-literal'
+
+ @property
+ def domain(self):
+ return ''.join(super(DomainLiteral, self).value.split())
+
+ @property
+ def ip(self):
+ for x in self:
+ if x.token_type == 'ptext':
+ return x.value
+
+
+class MIMEVersion(TokenList):
+
+ token_type = 'mime-version'
+ major = None
+ minor = None
+
+
+class Parameter(TokenList):
+
+ token_type = 'parameter'
+ sectioned = False
+ extended = False
+ charset = 'us-ascii'
+
+ @property
+ def section_number(self):
+ # Because the first token, the attribute (name) eats CFWS, the second
+ # token is always the section if there is one.
+ return self[1].number if self.sectioned else 0
+
+ @property
+ def param_value(self):
+ # This is part of the "handle quoted extended parameters" hack.
+ for token in self:
+ if token.token_type == 'value':
+ return token.stripped_value
+ if token.token_type == 'quoted-string':
+ for token in token:
+ if token.token_type == 'bare-quoted-string':
+ for token in token:
+ if token.token_type == 'value':
+ return token.stripped_value
+ return ''
+
+
+class InvalidParameter(Parameter):
+
+ token_type = 'invalid-parameter'
+
+
+class Attribute(TokenList):
+
+ token_type = 'attribute'
+
+ @property
+ def stripped_value(self):
+ for token in self:
+ if token.token_type.endswith('attrtext'):
+ return token.value
+
+class Section(TokenList):
+
+ token_type = 'section'
+ number = None
+
+
+class Value(TokenList):
+
+ token_type = 'value'
+
+ @property
+ def stripped_value(self):
+ token = self[0]
+ if token.token_type == 'cfws':
+ token = self[1]
+ if token.token_type.endswith(
+ ('quoted-string', 'attribute', 'extended-attribute')):
+ return token.stripped_value
+ return self.value
+
+
+class MimeParameters(TokenList):
+
+ token_type = 'mime-parameters'
+
+ @property
+ def params(self):
+ # The RFC specifically states that the ordering of parameters is not
+ # guaranteed and may be reordered by the transport layer. So we have
+ # to assume the RFC 2231 pieces can come in any order. However, we
+ # output them in the order that we first see a given name, which gives
+ # us a stable __str__.
+ params = OrderedDict()
+ for token in self:
+ if not token.token_type.endswith('parameter'):
+ continue
+ if token[0].token_type != 'attribute':
+ continue
+ name = token[0].value.strip()
+ if name not in params:
+ params[name] = []
+ params[name].append((token.section_number, token))
+ for name, parts in params.items():
+ parts = sorted(parts)
+ # XXX: there might be more recovery we could do here if, for
+ # example, this is really a case of a duplicate attribute name.
+ value_parts = []
+ charset = parts[0][1].charset
+ for i, (section_number, param) in enumerate(parts):
+ if section_number != i:
+ param.defects.append(errors.InvalidHeaderDefect(
+ "inconsistent multipart parameter numbering"))
+ value = param.param_value
+ if param.extended:
+ try:
+ value = unquote_to_bytes(value)
+ except UnicodeEncodeError:
+ # source had surrogate escaped bytes. What we do now
+ # is a bit of an open question. I'm not sure this is
+ # the best choice, but it is what the old algorithm did
+ value = unquote(value, encoding='latin-1')
+ else:
+ try:
+ value = value.decode(charset, 'surrogateescape')
+ except LookupError:
+ # XXX: there should really be a custom defect for
+ # unknown character set to make it easy to find,
+ # because otherwise unknown charset is a silent
+ # failure.
+ value = value.decode('us-ascii', 'surrogateescape')
+ if utils._has_surrogates(value):
+ param.defects.append(errors.UndecodableBytesDefect())
+ value_parts.append(value)
+ value = ''.join(value_parts)
+ yield name, value
+
+ def __str__(self):
+ params = []
+ for name, value in self.params:
+ if value:
+ params.append('{}={}'.format(name, quote_string(value)))
+ else:
+ params.append(name)
+ params = '; '.join(params)
+ return ' ' + params if params else ''
+
+
+class ParameterizedHeaderValue(TokenList):
+
+ @property
+ def params(self):
+ for token in reversed(self):
+ if token.token_type == 'mime-parameters':
+ return token.params
+ return {}
+
+ @property
+ def parts(self):
+ if self and self[-1].token_type == 'mime-parameters':
+ # We don't want to start a new line if all of the params don't fit
+ # after the value, so unwrap the parameter list.
+ return TokenList(self[:-1] + self[-1])
+ return TokenList(self).parts
+
+
+class ContentType(ParameterizedHeaderValue):
+
+ token_type = 'content-type'
+ maintype = 'text'
+ subtype = 'plain'
+
+
+class ContentDisposition(ParameterizedHeaderValue):
+
+ token_type = 'content-disposition'
+ content_disposition = None
+
+
+class ContentTransferEncoding(TokenList):
+
+ token_type = 'content-transfer-encoding'
+ cte = '7bit'
+
+
+class HeaderLabel(TokenList):
+
+ token_type = 'header-label'
+
+
+class Header(TokenList):
+
+ token_type = 'header'
+
+ def _fold(self, folded):
+ folded.append(str(self.pop(0)))
+ folded.lastlen = len(folded.current[0])
+ # The first line of the header is different from all others: we don't
+ # want to start a new object on a new line if it has any fold points in
+ # it that would allow part of it to be on the first header line.
+ # Further, if the first fold point would fit on the new line, we want
+ # to do that, but if it doesn't we want to put it on the first line.
+ # Folded supports this via the stickyspace attribute. If this
+ # attribute is not None, it does the special handling.
+ folded.stickyspace = str(self.pop(0)) if self[0].token_type == 'cfws' else ''
+ rest = self.pop(0)
+ if self:
+ raise ValueError("Malformed Header token list")
+ rest._fold(folded)
+
+
+#
+# Terminal classes and instances
+#
+
+class Terminal(str):
+
+ def __new__(cls, value, token_type):
+ self = super(Terminal, cls).__new__(cls, value)
+ self.token_type = token_type
+ self.defects = []
+ return self
+
+ def __repr__(self):
+ return "{}({})".format(self.__class__.__name__, super(Terminal, self).__repr__())
+
+ @property
+ def all_defects(self):
+ return list(self.defects)
+
+ def _pp(self, indent=''):
+ return ["{}{}/{}({}){}".format(
+ indent,
+ self.__class__.__name__,
+ self.token_type,
+ super(Terminal, self).__repr__(),
+ '' if not self.defects else ' {}'.format(self.defects),
+ )]
+
+ def cte_encode(self, charset, policy):
+ value = str(self)
+ try:
+ value.encode('us-ascii')
+ return value
+ except UnicodeEncodeError:
+ return _ew.encode(value, charset)
+
+ def pop_trailing_ws(self):
+ # This terminates the recursion.
+ return None
+
+ def pop_leading_fws(self):
+ # This terminates the recursion.
+ return None
+
+ @property
+ def comments(self):
+ return []
+
+ def has_leading_comment(self):
+ return False
+
+ def __getnewargs__(self):
+ return(str(self), self.token_type)
+
+
+class WhiteSpaceTerminal(Terminal):
+
+ @property
+ def value(self):
+ return ' '
+
+ def startswith_fws(self):
+ return True
+
+ has_fws = True
+
+
+class ValueTerminal(Terminal):
+
+ @property
+ def value(self):
+ return self
+
+ def startswith_fws(self):
+ return False
+
+ has_fws = False
+
+ def as_encoded_word(self, charset):
+ return _ew.encode(str(self), charset)
+
+
+class EWWhiteSpaceTerminal(WhiteSpaceTerminal):
+
+ @property
+ def value(self):
+ return ''
+
+ @property
+ def encoded(self):
+ return self[:]
+
+ def __str__(self):
+ return ''
+
+ has_fws = True
+
+
+# XXX these need to become classes and used as instances so
+# that a program can't change them in a parse tree and screw
+# up other parse trees. Maybe should have tests for that, too.
+DOT = ValueTerminal('.', 'dot')
+ListSeparator = ValueTerminal(',', 'list-separator')
+RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
+
+#
+# Parser
+#
+
+"""Parse strings according to RFC822/2047/2822/5322 rules.
+
+This is a stateless parser. Each get_XXX function accepts a string and
+returns either a Terminal or a TokenList representing the RFC object named
+by the method and a string containing the remaining unparsed characters
+from the input. Thus a parser method consumes the next syntactic construct
+of a given type and returns a token representing the construct plus the
+unparsed remainder of the input string.
+
+For example, if the first element of a structured header is a 'phrase',
+then:
+
+ phrase, value = get_phrase(value)
+
+returns the complete phrase from the start of the string value, plus any
+characters left in the string after the phrase is removed.
+
+"""
+
+_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
+_non_atom_end_matcher = re.compile(r"[^{}]+".format(
+ ''.join(ATOM_ENDS).replace('\\','\\\\').replace(']','\]'))).match
+_non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall
+_non_token_end_matcher = re.compile(r"[^{}]+".format(
+ ''.join(TOKEN_ENDS).replace('\\','\\\\').replace(']','\]'))).match
+_non_attribute_end_matcher = re.compile(r"[^{}]+".format(
+ ''.join(ATTRIBUTE_ENDS).replace('\\','\\\\').replace(']','\]'))).match
+_non_extended_attribute_end_matcher = re.compile(r"[^{}]+".format(
+ ''.join(EXTENDED_ATTRIBUTE_ENDS).replace(
+ '\\','\\\\').replace(']','\]'))).match
+
+def _validate_xtext(xtext):
+ """If input token contains ASCII non-printables, register a defect."""
+
+ non_printables = _non_printable_finder(xtext)
+ if non_printables:
+ xtext.defects.append(errors.NonPrintableDefect(non_printables))
+ if utils._has_surrogates(xtext):
+ xtext.defects.append(errors.UndecodableBytesDefect(
+ "Non-ASCII characters found in header token"))
+
+def _get_ptext_to_endchars(value, endchars):
+ """Scan printables/quoted-pairs until endchars and return unquoted ptext.
+
+ This function turns a run of qcontent, ccontent-without-comments, or
+ dtext-with-quoted-printables into a single string by unquoting any
+ quoted printables. It returns the string, the remaining value, and
+ a flag that is True iff there were any quoted printables decoded.
+
+ """
+ _3to2list = list(_wsp_splitter(value, 1))
+ fragment, remainder, = _3to2list[:1] + [_3to2list[1:]]
+ vchars = []
+ escape = False
+ had_qp = False
+ for pos in range(len(fragment)):
+ if fragment[pos] == '\\':
+ if escape:
+ escape = False
+ had_qp = True
+ else:
+ escape = True
+ continue
+ if escape:
+ escape = False
+ elif fragment[pos] in endchars:
+ break
+ vchars.append(fragment[pos])
+ else:
+ pos = pos + 1
+ return ''.join(vchars), ''.join([fragment[pos:]] + remainder), had_qp
+
+def _decode_ew_run(value):
+ """ Decode a run of RFC2047 encoded words.
+
+ _decode_ew_run(value) -> (text, value, defects)
+
+ Scans the supplied value for a run of tokens that look like they are RFC
+ 2047 encoded words, decodes those words into text according to RFC 2047
+ rules (whitespace between encoded words is discarded), and returns the text
+ and the remaining value (including any leading whitespace on the remaining
+ value), as well as a list of any defects encountered while decoding. The
+ input value may not have any leading whitespace.
+
+ """
+ res = []
+ defects = []
+ last_ws = ''
+ while value:
+ try:
+ tok, ws, value = _wsp_splitter(value, 1)
+ except ValueError:
+ tok, ws, value = value, '', ''
+ if not (tok.startswith('=?') and tok.endswith('?=')):
+ return ''.join(res), last_ws + tok + ws + value, defects
+ text, charset, lang, new_defects = _ew.decode(tok)
+ res.append(text)
+ defects.extend(new_defects)
+ last_ws = ws
+ return ''.join(res), last_ws, defects
+
+def get_fws(value):
+ """FWS = 1*WSP
+
+ This isn't the RFC definition. We're using fws to represent tokens where
+ folding can be done, but when we are parsing the *un*folding has already
+ been done so we don't need to watch out for CRLF.
+
+ """
+ newvalue = value.lstrip()
+ fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
+ return fws, newvalue
+
+def get_encoded_word(value):
+ """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
+
+ """
+ ew = EncodedWord()
+ if not value.startswith('=?'):
+ raise errors.HeaderParseError(
+ "expected encoded word but found {}".format(value))
+ _3to2list1 = list(value[2:].split('?=', 1))
+ tok, remainder, = _3to2list1[:1] + [_3to2list1[1:]]
+ if tok == value[2:]:
+ raise errors.HeaderParseError(
+ "expected encoded word but found {}".format(value))
+ remstr = ''.join(remainder)
+ if remstr[:2].isdigit():
+ _3to2list3 = list(remstr.split('?=', 1))
+ rest, remainder, = _3to2list3[:1] + [_3to2list3[1:]]
+ tok = tok + '?=' + rest
+ if len(tok.split()) > 1:
+ ew.defects.append(errors.InvalidHeaderDefect(
+ "whitespace inside encoded word"))
+ ew.cte = value
+ value = ''.join(remainder)
+ try:
+ text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
+ except ValueError:
+ raise errors.HeaderParseError(
+ "encoded word format invalid: '{}'".format(ew.cte))
+ ew.charset = charset
+ ew.lang = lang
+ ew.defects.extend(defects)
+ while text:
+ if text[0] in WSP:
+ token, text = get_fws(text)
+ ew.append(token)
+ continue
+ _3to2list5 = list(_wsp_splitter(text, 1))
+ chars, remainder, = _3to2list5[:1] + [_3to2list5[1:]]
+ vtext = ValueTerminal(chars, 'vtext')
+ _validate_xtext(vtext)
+ ew.append(vtext)
+ text = ''.join(remainder)
+ return ew, value
+
+def get_unstructured(value):
+ """unstructured = (*([FWS] vchar) *WSP) / obs-unstruct
+ obs-unstruct = *((*LF *CR *(obs-utext) *LF *CR)) / FWS)
+ obs-utext = %d0 / obs-NO-WS-CTL / LF / CR
+
+ obs-NO-WS-CTL is control characters except WSP/CR/LF.
+
+ So, basically, we have printable runs, plus control characters or nulls in
+ the obsolete syntax, separated by whitespace. Since RFC 2047 uses the
+ obsolete syntax in its specification, but requires whitespace on either
+ side of the encoded words, I can see no reason to need to separate the
+ non-printable-non-whitespace from the printable runs if they occur, so we
+ parse this into xtext tokens separated by WSP tokens.
+
+ Because an 'unstructured' value must by definition constitute the entire
+ value, this 'get' routine does not return a remaining value, only the
+ parsed TokenList.
+
+ """
+ # XXX: but what about bare CR and LF? They might signal the start or
+ # end of an encoded word. YAGNI for now, since out current parsers
+ # will never send us strings with bard CR or LF.
+
+ unstructured = UnstructuredTokenList()
+ while value:
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ unstructured.append(token)
+ continue
+ if value.startswith('=?'):
+ try:
+ token, value = get_encoded_word(value)
+ except errors.HeaderParseError:
+ pass
+ else:
+ have_ws = True
+ if len(unstructured) > 0:
+ if unstructured[-1].token_type != 'fws':
+ unstructured.defects.append(errors.InvalidHeaderDefect(
+ "missing whitespace before encoded word"))
+ have_ws = False
+ if have_ws and len(unstructured) > 1:
+ if unstructured[-2].token_type == 'encoded-word':
+ unstructured[-1] = EWWhiteSpaceTerminal(
+ unstructured[-1], 'fws')
+ unstructured.append(token)
+ continue
+ _3to2list7 = list(_wsp_splitter(value, 1))
+ tok, remainder, = _3to2list7[:1] + [_3to2list7[1:]]
+ vtext = ValueTerminal(tok, 'vtext')
+ _validate_xtext(vtext)
+ unstructured.append(vtext)
+ value = ''.join(remainder)
+ return unstructured
+
+def get_qp_ctext(value):
+ """ctext =
+
+ This is not the RFC ctext, since we are handling nested comments in comment
+ and unquoting quoted-pairs here. We allow anything except the '()'
+ characters, but if we find any ASCII other than the RFC defined printable
+ ASCII an NonPrintableDefect is added to the token's defects list. Since
+ quoted pairs are converted to their unquoted values, what is returned is
+ a 'ptext' token. In this case it is a WhiteSpaceTerminal, so it's value
+ is ' '.
+
+ """
+ ptext, value, _ = _get_ptext_to_endchars(value, '()')
+ ptext = WhiteSpaceTerminal(ptext, 'ptext')
+ _validate_xtext(ptext)
+ return ptext, value
+
+def get_qcontent(value):
+ """qcontent = qtext / quoted-pair
+
+ We allow anything except the DQUOTE character, but if we find any ASCII
+ other than the RFC defined printable ASCII an NonPrintableDefect is
+ added to the token's defects list. Any quoted pairs are converted to their
+ unquoted values, so what is returned is a 'ptext' token. In this case it
+ is a ValueTerminal.
+
+ """
+ ptext, value, _ = _get_ptext_to_endchars(value, '"')
+ ptext = ValueTerminal(ptext, 'ptext')
+ _validate_xtext(ptext)
+ return ptext, value
+
+def get_atext(value):
+ """atext =
+
+ We allow any non-ATOM_ENDS in atext, but add an InvalidATextDefect to
+ the token's defects list if we find non-atext characters.
+ """
+ m = _non_atom_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected atext but found '{}'".format(value))
+ atext = m.group()
+ value = value[len(atext):]
+ atext = ValueTerminal(atext, 'atext')
+ _validate_xtext(atext)
+ return atext, value
+
+def get_bare_quoted_string(value):
+ """bare-quoted-string = DQUOTE *([FWS] qcontent) [FWS] DQUOTE
+
+ A quoted-string without the leading or trailing white space. Its
+ value is the text between the quote marks, with whitespace
+ preserved and quoted pairs decoded.
+ """
+ if value[0] != '"':
+ raise errors.HeaderParseError(
+ "expected '\"' but found '{}'".format(value))
+ bare_quoted_string = BareQuotedString()
+ value = value[1:]
+ while value and value[0] != '"':
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ else:
+ token, value = get_qcontent(value)
+ bare_quoted_string.append(token)
+ if not value:
+ bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
+ "end of header inside quoted string"))
+ return bare_quoted_string, value
+ return bare_quoted_string, value[1:]
+
+def get_comment(value):
+ """comment = "(" *([FWS] ccontent) [FWS] ")"
+ ccontent = ctext / quoted-pair / comment
+
+ We handle nested comments here, and quoted-pair in our qp-ctext routine.
+ """
+ if value and value[0] != '(':
+ raise errors.HeaderParseError(
+ "expected '(' but found '{}'".format(value))
+ comment = Comment()
+ value = value[1:]
+ while value and value[0] != ")":
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ elif value[0] == '(':
+ token, value = get_comment(value)
+ else:
+ token, value = get_qp_ctext(value)
+ comment.append(token)
+ if not value:
+ comment.defects.append(errors.InvalidHeaderDefect(
+ "end of header inside comment"))
+ return comment, value
+ return comment, value[1:]
+
+def get_cfws(value):
+ """CFWS = (1*([FWS] comment) [FWS]) / FWS
+
+ """
+ cfws = CFWSList()
+ while value and value[0] in CFWS_LEADER:
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ else:
+ token, value = get_comment(value)
+ cfws.append(token)
+ return cfws, value
+
+def get_quoted_string(value):
+ """quoted-string = [CFWS] [CFWS]
+
+ 'bare-quoted-string' is an intermediate class defined by this
+ parser and not by the RFC grammar. It is the quoted string
+ without any attached CFWS.
+ """
+ quoted_string = QuotedString()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ quoted_string.append(token)
+ token, value = get_bare_quoted_string(value)
+ quoted_string.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ quoted_string.append(token)
+ return quoted_string, value
+
+def get_atom(value):
+ """atom = [CFWS] 1*atext [CFWS]
+
+ """
+ atom = Atom()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ atom.append(token)
+ if value and value[0] in ATOM_ENDS:
+ raise errors.HeaderParseError(
+ "expected atom but found '{}'".format(value))
+ token, value = get_atext(value)
+ atom.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ atom.append(token)
+ return atom, value
+
+def get_dot_atom_text(value):
+ """ dot-text = 1*atext *("." 1*atext)
+
+ """
+ dot_atom_text = DotAtomText()
+ if not value or value[0] in ATOM_ENDS:
+ raise errors.HeaderParseError("expected atom at a start of "
+ "dot-atom-text but found '{}'".format(value))
+ while value and value[0] not in ATOM_ENDS:
+ token, value = get_atext(value)
+ dot_atom_text.append(token)
+ if value and value[0] == '.':
+ dot_atom_text.append(DOT)
+ value = value[1:]
+ if dot_atom_text[-1] is DOT:
+ raise errors.HeaderParseError("expected atom at end of dot-atom-text "
+ "but found '{}'".format('.'+value))
+ return dot_atom_text, value
+
+def get_dot_atom(value):
+ """ dot-atom = [CFWS] dot-atom-text [CFWS]
+
+ """
+ dot_atom = DotAtom()
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ dot_atom.append(token)
+ token, value = get_dot_atom_text(value)
+ dot_atom.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ dot_atom.append(token)
+ return dot_atom, value
+
+def get_word(value):
+ """word = atom / quoted-string
+
+ Either atom or quoted-string may start with CFWS. We have to peel off this
+ CFWS first to determine which type of word to parse. Afterward we splice
+ the leading CFWS, if any, into the parsed sub-token.
+
+ If neither an atom or a quoted-string is found before the next special, a
+ HeaderParseError is raised.
+
+ The token returned is either an Atom or a QuotedString, as appropriate.
+ This means the 'word' level of the formal grammar is not represented in the
+ parse tree; this is because having that extra layer when manipulating the
+ parse tree is more confusing than it is helpful.
+
+ """
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ else:
+ leader = None
+ if value[0]=='"':
+ token, value = get_quoted_string(value)
+ elif value[0] in SPECIALS:
+ raise errors.HeaderParseError("Expected 'atom' or 'quoted-string' "
+ "but found '{}'".format(value))
+ else:
+ token, value = get_atom(value)
+ if leader is not None:
+ token[:0] = [leader]
+ return token, value
+
+def get_phrase(value):
+ """ phrase = 1*word / obs-phrase
+ obs-phrase = word *(word / "." / CFWS)
+
+ This means a phrase can be a sequence of words, periods, and CFWS in any
+ order as long as it starts with at least one word. If anything other than
+ words is detected, an ObsoleteHeaderDefect is added to the token's defect
+ list. We also accept a phrase that starts with CFWS followed by a dot;
+ this is registered as an InvalidHeaderDefect, since it is not supported by
+ even the obsolete grammar.
+
+ """
+ phrase = Phrase()
+ try:
+ token, value = get_word(value)
+ phrase.append(token)
+ except errors.HeaderParseError:
+ phrase.defects.append(errors.InvalidHeaderDefect(
+ "phrase does not start with word"))
+ while value and value[0] not in PHRASE_ENDS:
+ if value[0]=='.':
+ phrase.append(DOT)
+ phrase.defects.append(errors.ObsoleteHeaderDefect(
+ "period in 'phrase'"))
+ value = value[1:]
+ else:
+ try:
+ token, value = get_word(value)
+ except errors.HeaderParseError:
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ phrase.defects.append(errors.ObsoleteHeaderDefect(
+ "comment found without atom"))
+ else:
+ raise
+ phrase.append(token)
+ return phrase, value
+
+def get_local_part(value):
+ """ local-part = dot-atom / quoted-string / obs-local-part
+
+ """
+ local_part = LocalPart()
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected local-part but found '{}'".format(value))
+ try:
+ token, value = get_dot_atom(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_word(value)
+ except errors.HeaderParseError:
+ if value[0] != '\\' and value[0] in PHRASE_ENDS:
+ raise
+ token = TokenList()
+ if leader is not None:
+ token[:0] = [leader]
+ local_part.append(token)
+ if value and (value[0]=='\\' or value[0] not in PHRASE_ENDS):
+ obs_local_part, value = get_obs_local_part(str(local_part) + value)
+ if obs_local_part.token_type == 'invalid-obs-local-part':
+ local_part.defects.append(errors.InvalidHeaderDefect(
+ "local-part is not dot-atom, quoted-string, or obs-local-part"))
+ else:
+ local_part.defects.append(errors.ObsoleteHeaderDefect(
+ "local-part is not a dot-atom (contains CFWS)"))
+ local_part[0] = obs_local_part
+ try:
+ local_part.value.encode('ascii')
+ except UnicodeEncodeError:
+ local_part.defects.append(errors.NonASCIILocalPartDefect(
+ "local-part contains non-ASCII characters)"))
+ return local_part, value
+
+def get_obs_local_part(value):
+ """ obs-local-part = word *("." word)
+ """
+ obs_local_part = ObsLocalPart()
+ last_non_ws_was_dot = False
+ while value and (value[0]=='\\' or value[0] not in PHRASE_ENDS):
+ if value[0] == '.':
+ if last_non_ws_was_dot:
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "invalid repeated '.'"))
+ obs_local_part.append(DOT)
+ last_non_ws_was_dot = True
+ value = value[1:]
+ continue
+ elif value[0]=='\\':
+ obs_local_part.append(ValueTerminal(value[0],
+ 'misplaced-special'))
+ value = value[1:]
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "'\\' character outside of quoted-string/ccontent"))
+ last_non_ws_was_dot = False
+ continue
+ if obs_local_part and obs_local_part[-1].token_type != 'dot':
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "missing '.' between words"))
+ try:
+ token, value = get_word(value)
+ last_non_ws_was_dot = False
+ except errors.HeaderParseError:
+ if value[0] not in CFWS_LEADER:
+ raise
+ token, value = get_cfws(value)
+ obs_local_part.append(token)
+ if (obs_local_part[0].token_type == 'dot' or
+ obs_local_part[0].token_type=='cfws' and
+ obs_local_part[1].token_type=='dot'):
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "Invalid leading '.' in local part"))
+ if (obs_local_part[-1].token_type == 'dot' or
+ obs_local_part[-1].token_type=='cfws' and
+ obs_local_part[-2].token_type=='dot'):
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "Invalid trailing '.' in local part"))
+ if obs_local_part.defects:
+ obs_local_part.token_type = 'invalid-obs-local-part'
+ return obs_local_part, value
+
+def get_dtext(value):
+ """ dtext = / obs-dtext
+ obs-dtext = obs-NO-WS-CTL / quoted-pair
+
+ We allow anything except the excluded characters, but if we find any
+ ASCII other than the RFC defined printable ASCII an NonPrintableDefect is
+ added to the token's defects list. Quoted pairs are converted to their
+ unquoted values, so what is returned is a ptext token, in this case a
+ ValueTerminal. If there were quoted-printables, an ObsoleteHeaderDefect is
+ added to the returned token's defect list.
+
+ """
+ ptext, value, had_qp = _get_ptext_to_endchars(value, '[]')
+ ptext = ValueTerminal(ptext, 'ptext')
+ if had_qp:
+ ptext.defects.append(errors.ObsoleteHeaderDefect(
+ "quoted printable found in domain-literal"))
+ _validate_xtext(ptext)
+ return ptext, value
+
+def _check_for_early_dl_end(value, domain_literal):
+ if value:
+ return False
+ domain_literal.append(errors.InvalidHeaderDefect(
+ "end of input inside domain-literal"))
+ domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
+ return True
+
+def get_domain_literal(value):
+ """ domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
+
+ """
+ domain_literal = DomainLiteral()
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ domain_literal.append(token)
+ if not value:
+ raise errors.HeaderParseError("expected domain-literal")
+ if value[0] != '[':
+ raise errors.HeaderParseError("expected '[' at start of domain-literal "
+ "but found '{}'".format(value))
+ value = value[1:]
+ if _check_for_early_dl_end(value, domain_literal):
+ return domain_literal, value
+ domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ domain_literal.append(token)
+ token, value = get_dtext(value)
+ domain_literal.append(token)
+ if _check_for_early_dl_end(value, domain_literal):
+ return domain_literal, value
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ domain_literal.append(token)
+ if _check_for_early_dl_end(value, domain_literal):
+ return domain_literal, value
+ if value[0] != ']':
+ raise errors.HeaderParseError("expected ']' at end of domain-literal "
+ "but found '{}'".format(value))
+ domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
+ value = value[1:]
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ domain_literal.append(token)
+ return domain_literal, value
+
+def get_domain(value):
+ """ domain = dot-atom / domain-literal / obs-domain
+ obs-domain = atom *("." atom))
+
+ """
+ domain = Domain()
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected domain but found '{}'".format(value))
+ if value[0] == '[':
+ token, value = get_domain_literal(value)
+ if leader is not None:
+ token[:0] = [leader]
+ domain.append(token)
+ return domain, value
+ try:
+ token, value = get_dot_atom(value)
+ except errors.HeaderParseError:
+ token, value = get_atom(value)
+ if leader is not None:
+ token[:0] = [leader]
+ domain.append(token)
+ if value and value[0] == '.':
+ domain.defects.append(errors.ObsoleteHeaderDefect(
+ "domain is not a dot-atom (contains CFWS)"))
+ if domain[0].token_type == 'dot-atom':
+ domain[:] = domain[0]
+ while value and value[0] == '.':
+ domain.append(DOT)
+ token, value = get_atom(value[1:])
+ domain.append(token)
+ return domain, value
+
+def get_addr_spec(value):
+ """ addr-spec = local-part "@" domain
+
+ """
+ addr_spec = AddrSpec()
+ token, value = get_local_part(value)
+ addr_spec.append(token)
+ if not value or value[0] != '@':
+ addr_spec.defects.append(errors.InvalidHeaderDefect(
+ "add-spec local part with no domain"))
+ return addr_spec, value
+ addr_spec.append(ValueTerminal('@', 'address-at-symbol'))
+ token, value = get_domain(value[1:])
+ addr_spec.append(token)
+ return addr_spec, value
+
+def get_obs_route(value):
+ """ obs-route = obs-domain-list ":"
+ obs-domain-list = *(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])
+
+ Returns an obs-route token with the appropriate sub-tokens (that is,
+ there is no obs-domain-list in the parse tree).
+ """
+ obs_route = ObsRoute()
+ while value and (value[0]==',' or value[0] in CFWS_LEADER):
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ obs_route.append(token)
+ elif value[0] == ',':
+ obs_route.append(ListSeparator)
+ value = value[1:]
+ if not value or value[0] != '@':
+ raise errors.HeaderParseError(
+ "expected obs-route domain but found '{}'".format(value))
+ obs_route.append(RouteComponentMarker)
+ token, value = get_domain(value[1:])
+ obs_route.append(token)
+ while value and value[0]==',':
+ obs_route.append(ListSeparator)
+ value = value[1:]
+ if not value:
+ break
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ obs_route.append(token)
+ if value[0] == '@':
+ obs_route.append(RouteComponentMarker)
+ token, value = get_domain(value[1:])
+ obs_route.append(token)
+ if not value:
+ raise errors.HeaderParseError("end of header while parsing obs-route")
+ if value[0] != ':':
+ raise errors.HeaderParseError( "expected ':' marking end of "
+ "obs-route but found '{}'".format(value))
+ obs_route.append(ValueTerminal(':', 'end-of-obs-route-marker'))
+ return obs_route, value[1:]
+
+def get_angle_addr(value):
+ """ angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr
+ obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS]
+
+ """
+ angle_addr = AngleAddr()
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ angle_addr.append(token)
+ if not value or value[0] != '<':
+ raise errors.HeaderParseError(
+ "expected angle-addr but found '{}'".format(value))
+ angle_addr.append(ValueTerminal('<', 'angle-addr-start'))
+ value = value[1:]
+ # Although it is not legal per RFC5322, SMTP uses '<>' in certain
+ # circumstances.
+ if value[0] == '>':
+ angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
+ angle_addr.defects.append(errors.InvalidHeaderDefect(
+ "null addr-spec in angle-addr"))
+ value = value[1:]
+ return angle_addr, value
+ try:
+ token, value = get_addr_spec(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_obs_route(value)
+ angle_addr.defects.append(errors.ObsoleteHeaderDefect(
+ "obsolete route specification in angle-addr"))
+ except errors.HeaderParseError:
+ raise errors.HeaderParseError(
+ "expected addr-spec or obs-route but found '{}'".format(value))
+ angle_addr.append(token)
+ token, value = get_addr_spec(value)
+ angle_addr.append(token)
+ if value and value[0] == '>':
+ value = value[1:]
+ else:
+ angle_addr.defects.append(errors.InvalidHeaderDefect(
+ "missing trailing '>' on angle-addr"))
+ angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ angle_addr.append(token)
+ return angle_addr, value
+
+def get_display_name(value):
+ """ display-name = phrase
+
+ Because this is simply a name-rule, we don't return a display-name
+ token containing a phrase, but rather a display-name token with
+ the content of the phrase.
+
+ """
+ display_name = DisplayName()
+ token, value = get_phrase(value)
+ display_name.extend(token[:])
+ display_name.defects = token.defects[:]
+ return display_name, value
+
+
+def get_name_addr(value):
+ """ name-addr = [display-name] angle-addr
+
+ """
+ name_addr = NameAddr()
+ # Both the optional display name and the angle-addr can start with cfws.
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected name-addr but found '{}'".format(leader))
+ if value[0] != '<':
+ if value[0] in PHRASE_ENDS:
+ raise errors.HeaderParseError(
+ "expected name-addr but found '{}'".format(value))
+ token, value = get_display_name(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected name-addr but found '{}'".format(token))
+ if leader is not None:
+ token[0][:0] = [leader]
+ leader = None
+ name_addr.append(token)
+ token, value = get_angle_addr(value)
+ if leader is not None:
+ token[:0] = [leader]
+ name_addr.append(token)
+ return name_addr, value
+
+def get_mailbox(value):
+ """ mailbox = name-addr / addr-spec
+
+ """
+ # The only way to figure out if we are dealing with a name-addr or an
+ # addr-spec is to try parsing each one.
+ mailbox = Mailbox()
+ try:
+ token, value = get_name_addr(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_addr_spec(value)
+ except errors.HeaderParseError:
+ raise errors.HeaderParseError(
+ "expected mailbox but found '{}'".format(value))
+ if any(isinstance(x, errors.InvalidHeaderDefect)
+ for x in token.all_defects):
+ mailbox.token_type = 'invalid-mailbox'
+ mailbox.append(token)
+ return mailbox, value
+
+def get_invalid_mailbox(value, endchars):
+ """ Read everything up to one of the chars in endchars.
+
+ This is outside the formal grammar. The InvalidMailbox TokenList that is
+ returned acts like a Mailbox, but the data attributes are None.
+
+ """
+ invalid_mailbox = InvalidMailbox()
+ while value and value[0] not in endchars:
+ if value[0] in PHRASE_ENDS:
+ invalid_mailbox.append(ValueTerminal(value[0],
+ 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ invalid_mailbox.append(token)
+ return invalid_mailbox, value
+
+def get_mailbox_list(value):
+ """ mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
+ obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS])
+
+ For this routine we go outside the formal grammar in order to improve error
+ handling. We recognize the end of the mailbox list only at the end of the
+ value or at a ';' (the group terminator). This is so that we can turn
+ invalid mailboxes into InvalidMailbox tokens and continue parsing any
+ remaining valid mailboxes. We also allow all mailbox entries to be null,
+ and this condition is handled appropriately at a higher level.
+
+ """
+ mailbox_list = MailboxList()
+ while value and value[0] != ';':
+ try:
+ token, value = get_mailbox(value)
+ mailbox_list.append(token)
+ except errors.HeaderParseError:
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value or value[0] in ',;':
+ mailbox_list.append(leader)
+ mailbox_list.defects.append(errors.ObsoleteHeaderDefect(
+ "empty element in mailbox-list"))
+ else:
+ token, value = get_invalid_mailbox(value, ',;')
+ if leader is not None:
+ token[:0] = [leader]
+ mailbox_list.append(token)
+ mailbox_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid mailbox in mailbox-list"))
+ elif value[0] == ',':
+ mailbox_list.defects.append(errors.ObsoleteHeaderDefect(
+ "empty element in mailbox-list"))
+ else:
+ token, value = get_invalid_mailbox(value, ',;')
+ if leader is not None:
+ token[:0] = [leader]
+ mailbox_list.append(token)
+ mailbox_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid mailbox in mailbox-list"))
+ if value and value[0] not in ',;':
+ # Crap after mailbox; treat it as an invalid mailbox.
+ # The mailbox info will still be available.
+ mailbox = mailbox_list[-1]
+ mailbox.token_type = 'invalid-mailbox'
+ token, value = get_invalid_mailbox(value, ',;')
+ mailbox.extend(token)
+ mailbox_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid mailbox in mailbox-list"))
+ if value and value[0] == ',':
+ mailbox_list.append(ListSeparator)
+ value = value[1:]
+ return mailbox_list, value
+
+
+def get_group_list(value):
+ """ group-list = mailbox-list / CFWS / obs-group-list
+ obs-group-list = 1*([CFWS] ",") [CFWS]
+
+ """
+ group_list = GroupList()
+ if not value:
+ group_list.defects.append(errors.InvalidHeaderDefect(
+ "end of header before group-list"))
+ return group_list, value
+ leader = None
+ if value and value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ # This should never happen in email parsing, since CFWS-only is a
+ # legal alternative to group-list in a group, which is the only
+ # place group-list appears.
+ group_list.defects.append(errors.InvalidHeaderDefect(
+ "end of header in group-list"))
+ group_list.append(leader)
+ return group_list, value
+ if value[0] == ';':
+ group_list.append(leader)
+ return group_list, value
+ token, value = get_mailbox_list(value)
+ if len(token.all_mailboxes)==0:
+ if leader is not None:
+ group_list.append(leader)
+ group_list.extend(token)
+ group_list.defects.append(errors.ObsoleteHeaderDefect(
+ "group-list with empty entries"))
+ return group_list, value
+ if leader is not None:
+ token[:0] = [leader]
+ group_list.append(token)
+ return group_list, value
+
+def get_group(value):
+ """ group = display-name ":" [group-list] ";" [CFWS]
+
+ """
+ group = Group()
+ token, value = get_display_name(value)
+ if not value or value[0] != ':':
+ raise errors.HeaderParseError("expected ':' at end of group "
+ "display name but found '{}'".format(value))
+ group.append(token)
+ group.append(ValueTerminal(':', 'group-display-name-terminator'))
+ value = value[1:]
+ if value and value[0] == ';':
+ group.append(ValueTerminal(';', 'group-terminator'))
+ return group, value[1:]
+ token, value = get_group_list(value)
+ group.append(token)
+ if not value:
+ group.defects.append(errors.InvalidHeaderDefect(
+ "end of header in group"))
+ if value[0] != ';':
+ raise errors.HeaderParseError(
+ "expected ';' at end of group but found {}".format(value))
+ group.append(ValueTerminal(';', 'group-terminator'))
+ value = value[1:]
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ group.append(token)
+ return group, value
+
+def get_address(value):
+ """ address = mailbox / group
+
+ Note that counter-intuitively, an address can be either a single address or
+ a list of addresses (a group). This is why the returned Address object has
+ a 'mailboxes' attribute which treats a single address as a list of length
+ one. When you need to differentiate between to two cases, extract the single
+ element, which is either a mailbox or a group token.
+
+ """
+ # The formal grammar isn't very helpful when parsing an address. mailbox
+ # and group, especially when allowing for obsolete forms, start off very
+ # similarly. It is only when you reach one of @, <, or : that you know
+ # what you've got. So, we try each one in turn, starting with the more
+ # likely of the two. We could perhaps make this more efficient by looking
+ # for a phrase and then branching based on the next character, but that
+ # would be a premature optimization.
+ address = Address()
+ try:
+ token, value = get_group(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_mailbox(value)
+ except errors.HeaderParseError:
+ raise errors.HeaderParseError(
+ "expected address but found '{}'".format(value))
+ address.append(token)
+ return address, value
+
+def get_address_list(value):
+ """ address_list = (address *("," address)) / obs-addr-list
+ obs-addr-list = *([CFWS] ",") address *("," [address / CFWS])
+
+ We depart from the formal grammar here by continuing to parse until the end
+ of the input, assuming the input to be entirely composed of an
+ address-list. This is always true in email parsing, and allows us
+ to skip invalid addresses to parse additional valid ones.
+
+ """
+ address_list = AddressList()
+ while value:
+ try:
+ token, value = get_address(value)
+ address_list.append(token)
+ except errors.HeaderParseError as err:
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value or value[0] == ',':
+ address_list.append(leader)
+ address_list.defects.append(errors.ObsoleteHeaderDefect(
+ "address-list entry with no content"))
+ else:
+ token, value = get_invalid_mailbox(value, ',')
+ if leader is not None:
+ token[:0] = [leader]
+ address_list.append(Address([token]))
+ address_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid address in address-list"))
+ elif value[0] == ',':
+ address_list.defects.append(errors.ObsoleteHeaderDefect(
+ "empty element in address-list"))
+ else:
+ token, value = get_invalid_mailbox(value, ',')
+ if leader is not None:
+ token[:0] = [leader]
+ address_list.append(Address([token]))
+ address_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid address in address-list"))
+ if value and value[0] != ',':
+ # Crap after address; treat it as an invalid mailbox.
+ # The mailbox info will still be available.
+ mailbox = address_list[-1][0]
+ mailbox.token_type = 'invalid-mailbox'
+ token, value = get_invalid_mailbox(value, ',')
+ mailbox.extend(token)
+ address_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid address in address-list"))
+ if value: # Must be a , at this point.
+ address_list.append(ValueTerminal(',', 'list-separator'))
+ value = value[1:]
+ return address_list, value
+
+#
+# XXX: As I begin to add additional header parsers, I'm realizing we probably
+# have two level of parser routines: the get_XXX methods that get a token in
+# the grammar, and parse_XXX methods that parse an entire field value. So
+# get_address_list above should really be a parse_ method, as probably should
+# be get_unstructured.
+#
+
+def parse_mime_version(value):
+ """ mime-version = [CFWS] 1*digit [CFWS] "." [CFWS] 1*digit [CFWS]
+
+ """
+ # The [CFWS] is implicit in the RFC 2045 BNF.
+ # XXX: This routine is a bit verbose, should factor out a get_int method.
+ mime_version = MIMEVersion()
+ if not value:
+ mime_version.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing MIME version number (eg: 1.0)"))
+ return mime_version
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if not value:
+ mime_version.defects.append(errors.HeaderMissingRequiredValue(
+ "Expected MIME version number but found only CFWS"))
+ digits = ''
+ while value and value[0] != '.' and value[0] not in CFWS_LEADER:
+ digits += value[0]
+ value = value[1:]
+ if not digits.isdigit():
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Expected MIME major version number but found {!r}".format(digits)))
+ mime_version.append(ValueTerminal(digits, 'xtext'))
+ else:
+ mime_version.major = int(digits)
+ mime_version.append(ValueTerminal(digits, 'digits'))
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if not value or value[0] != '.':
+ if mime_version.major is not None:
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Incomplete MIME version; found only major number"))
+ if value:
+ mime_version.append(ValueTerminal(value, 'xtext'))
+ return mime_version
+ mime_version.append(ValueTerminal('.', 'version-separator'))
+ value = value[1:]
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if not value:
+ if mime_version.major is not None:
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Incomplete MIME version; found only major number"))
+ return mime_version
+ digits = ''
+ while value and value[0] not in CFWS_LEADER:
+ digits += value[0]
+ value = value[1:]
+ if not digits.isdigit():
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Expected MIME minor version number but found {!r}".format(digits)))
+ mime_version.append(ValueTerminal(digits, 'xtext'))
+ else:
+ mime_version.minor = int(digits)
+ mime_version.append(ValueTerminal(digits, 'digits'))
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if value:
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Excess non-CFWS text after MIME version"))
+ mime_version.append(ValueTerminal(value, 'xtext'))
+ return mime_version
+
+def get_invalid_parameter(value):
+ """ Read everything up to the next ';'.
+
+ This is outside the formal grammar. The InvalidParameter TokenList that is
+ returned acts like a Parameter, but the data attributes are None.
+
+ """
+ invalid_parameter = InvalidParameter()
+ while value and value[0] != ';':
+ if value[0] in PHRASE_ENDS:
+ invalid_parameter.append(ValueTerminal(value[0],
+ 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ invalid_parameter.append(token)
+ return invalid_parameter, value
+
+def get_ttext(value):
+ """ttext =
+
+ We allow any non-TOKEN_ENDS in ttext, but add defects to the token's
+ defects list if we find non-ttext characters. We also register defects for
+ *any* non-printables even though the RFC doesn't exclude all of them,
+ because we follow the spirit of RFC 5322.
+
+ """
+ m = _non_token_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected ttext but found '{}'".format(value))
+ ttext = m.group()
+ value = value[len(ttext):]
+ ttext = ValueTerminal(ttext, 'ttext')
+ _validate_xtext(ttext)
+ return ttext, value
+
+def get_token(value):
+ """token = [CFWS] 1*ttext [CFWS]
+
+ The RFC equivalent of ttext is any US-ASCII chars except space, ctls, or
+ tspecials. We also exclude tabs even though the RFC doesn't.
+
+ The RFC implies the CFWS but is not explicit about it in the BNF.
+
+ """
+ mtoken = Token()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mtoken.append(token)
+ if value and value[0] in TOKEN_ENDS:
+ raise errors.HeaderParseError(
+ "expected token but found '{}'".format(value))
+ token, value = get_ttext(value)
+ mtoken.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mtoken.append(token)
+ return mtoken, value
+
+def get_attrtext(value):
+ """attrtext = 1*(any non-ATTRIBUTE_ENDS character)
+
+ We allow any non-ATTRIBUTE_ENDS in attrtext, but add defects to the
+ token's defects list if we find non-attrtext characters. We also register
+ defects for *any* non-printables even though the RFC doesn't exclude all of
+ them, because we follow the spirit of RFC 5322.
+
+ """
+ m = _non_attribute_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected attrtext but found {!r}".format(value))
+ attrtext = m.group()
+ value = value[len(attrtext):]
+ attrtext = ValueTerminal(attrtext, 'attrtext')
+ _validate_xtext(attrtext)
+ return attrtext, value
+
+def get_attribute(value):
+ """ [CFWS] 1*attrtext [CFWS]
+
+ This version of the BNF makes the CFWS explicit, and as usual we use a
+ value terminal for the actual run of characters. The RFC equivalent of
+ attrtext is the token characters, with the subtraction of '*', "'", and '%'.
+ We include tab in the excluded set just as we do for token.
+
+ """
+ attribute = Attribute()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ if value and value[0] in ATTRIBUTE_ENDS:
+ raise errors.HeaderParseError(
+ "expected token but found '{}'".format(value))
+ token, value = get_attrtext(value)
+ attribute.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ return attribute, value
+
+def get_extended_attrtext(value):
+ """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')
+
+ This is a special parsing routine so that we get a value that
+ includes % escapes as a single string (which we decode as a single
+ string later).
+
+ """
+ m = _non_extended_attribute_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected extended attrtext but found {!r}".format(value))
+ attrtext = m.group()
+ value = value[len(attrtext):]
+ attrtext = ValueTerminal(attrtext, 'extended-attrtext')
+ _validate_xtext(attrtext)
+ return attrtext, value
+
+def get_extended_attribute(value):
+ """ [CFWS] 1*extended_attrtext [CFWS]
+
+ This is like the non-extended version except we allow % characters, so that
+ we can pick up an encoded value as a single string.
+
+ """
+ # XXX: should we have an ExtendedAttribute TokenList?
+ attribute = Attribute()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ if value and value[0] in EXTENDED_ATTRIBUTE_ENDS:
+ raise errors.HeaderParseError(
+ "expected token but found '{}'".format(value))
+ token, value = get_extended_attrtext(value)
+ attribute.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ return attribute, value
+
+def get_section(value):
+ """ '*' digits
+
+ The formal BNF is more complicated because leading 0s are not allowed. We
+ check for that and add a defect. We also assume no CFWS is allowed between
+ the '*' and the digits, though the RFC is not crystal clear on that.
+ The caller should already have dealt with leading CFWS.
+
+ """
+ section = Section()
+ if not value or value[0] != '*':
+ raise errors.HeaderParseError("Expected section but found {}".format(
+ value))
+ section.append(ValueTerminal('*', 'section-marker'))
+ value = value[1:]
+ if not value or not value[0].isdigit():
+ raise errors.HeaderParseError("Expected section number but "
+ "found {}".format(value))
+ digits = ''
+ while value and value[0].isdigit():
+ digits += value[0]
+ value = value[1:]
+ if digits[0] == '0' and digits != '0':
+ section.defects.append(errors.InvalidHeaderError("section number"
+ "has an invalid leading 0"))
+ section.number = int(digits)
+ section.append(ValueTerminal(digits, 'digits'))
+ return section, value
+
+
+def get_value(value):
+ """ quoted-string / attribute
+
+ """
+ v = Value()
+ if not value:
+ raise errors.HeaderParseError("Expected value but found end of string")
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError("Expected value but found "
+ "only {}".format(leader))
+ if value[0] == '"':
+ token, value = get_quoted_string(value)
+ else:
+ token, value = get_extended_attribute(value)
+ if leader is not None:
+ token[:0] = [leader]
+ v.append(token)
+ return v, value
+
+def get_parameter(value):
+ """ attribute [section] ["*"] [CFWS] "=" value
+
+ The CFWS is implied by the RFC but not made explicit in the BNF. This
+ simplified form of the BNF from the RFC is made to conform with the RFC BNF
+ through some extra checks. We do it this way because it makes both error
+ recovery and working with the resulting parse tree easier.
+ """
+ # It is possible CFWS would also be implicitly allowed between the section
+ # and the 'extended-attribute' marker (the '*') , but we've never seen that
+ # in the wild and we will therefore ignore the possibility.
+ param = Parameter()
+ token, value = get_attribute(value)
+ param.append(token)
+ if not value or value[0] == ';':
+ param.defects.append(errors.InvalidHeaderDefect("Parameter contains "
+ "name ({}) but no value".format(token)))
+ return param, value
+ if value[0] == '*':
+ try:
+ token, value = get_section(value)
+ param.sectioned = True
+ param.append(token)
+ except errors.HeaderParseError:
+ pass
+ if not value:
+ raise errors.HeaderParseError("Incomplete parameter")
+ if value[0] == '*':
+ param.append(ValueTerminal('*', 'extended-parameter-marker'))
+ value = value[1:]
+ param.extended = True
+ if value[0] != '=':
+ raise errors.HeaderParseError("Parameter not followed by '='")
+ param.append(ValueTerminal('=', 'parameter-separator'))
+ value = value[1:]
+ leader = None
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ param.append(token)
+ remainder = None
+ appendto = param
+ if param.extended and value and value[0] == '"':
+ # Now for some serious hackery to handle the common invalid case of
+ # double quotes around an extended value. We also accept (with defect)
+ # a value marked as encoded that isn't really.
+ qstring, remainder = get_quoted_string(value)
+ inner_value = qstring.stripped_value
+ semi_valid = False
+ if param.section_number == 0:
+ if inner_value and inner_value[0] == "'":
+ semi_valid = True
+ else:
+ token, rest = get_attrtext(inner_value)
+ if rest and rest[0] == "'":
+ semi_valid = True
+ else:
+ try:
+ token, rest = get_extended_attrtext(inner_value)
+ except:
+ pass
+ else:
+ if not rest:
+ semi_valid = True
+ if semi_valid:
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Quoted string value for extended parameter is invalid"))
+ param.append(qstring)
+ for t in qstring:
+ if t.token_type == 'bare-quoted-string':
+ t[:] = []
+ appendto = t
+ break
+ value = inner_value
+ else:
+ remainder = None
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Parameter marked as extended but appears to have a "
+ "quoted string value that is non-encoded"))
+ if value and value[0] == "'":
+ token = None
+ else:
+ token, value = get_value(value)
+ if not param.extended or param.section_number > 0:
+ if not value or value[0] != "'":
+ appendto.append(token)
+ if remainder is not None:
+ assert not value, value
+ value = remainder
+ return param, value
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Apparent initial-extended-value but attribute "
+ "was not marked as extended or was not initial section"))
+ if not value:
+ # Assume the charset/lang is missing and the token is the value.
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Missing required charset/lang delimiters"))
+ appendto.append(token)
+ if remainder is None:
+ return param, value
+ else:
+ if token is not None:
+ for t in token:
+ if t.token_type == 'extended-attrtext':
+ break
+ t.token_type == 'attrtext'
+ appendto.append(t)
+ param.charset = t.value
+ if value[0] != "'":
+ raise errors.HeaderParseError("Expected RFC2231 char/lang encoding "
+ "delimiter, but found {!r}".format(value))
+ appendto.append(ValueTerminal("'", 'RFC2231 delimiter'))
+ value = value[1:]
+ if value and value[0] != "'":
+ token, value = get_attrtext(value)
+ appendto.append(token)
+ param.lang = token.value
+ if not value or value[0] != "'":
+ raise errors.HeaderParseError("Expected RFC2231 char/lang encoding "
+ "delimiter, but found {}".format(value))
+ appendto.append(ValueTerminal("'", 'RFC2231 delimiter'))
+ value = value[1:]
+ if remainder is not None:
+ # Treat the rest of value as bare quoted string content.
+ v = Value()
+ while value:
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ else:
+ token, value = get_qcontent(value)
+ v.append(token)
+ token = v
+ else:
+ token, value = get_value(value)
+ appendto.append(token)
+ if remainder is not None:
+ assert not value, value
+ value = remainder
+ return param, value
+
+def parse_mime_parameters(value):
+ """ parameter *( ";" parameter )
+
+ That BNF is meant to indicate this routine should only be called after
+ finding and handling the leading ';'. There is no corresponding rule in
+ the formal RFC grammar, but it is more convenient for us for the set of
+ parameters to be treated as its own TokenList.
+
+ This is 'parse' routine because it consumes the reminaing value, but it
+ would never be called to parse a full header. Instead it is called to
+ parse everything after the non-parameter value of a specific MIME header.
+
+ """
+ mime_parameters = MimeParameters()
+ while value:
+ try:
+ token, value = get_parameter(value)
+ mime_parameters.append(token)
+ except errors.HeaderParseError as err:
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ mime_parameters.append(leader)
+ return mime_parameters
+ if value[0] == ';':
+ if leader is not None:
+ mime_parameters.append(leader)
+ mime_parameters.defects.append(errors.InvalidHeaderDefect(
+ "parameter entry with no content"))
+ else:
+ token, value = get_invalid_parameter(value)
+ if leader:
+ token[:0] = [leader]
+ mime_parameters.append(token)
+ mime_parameters.defects.append(errors.InvalidHeaderDefect(
+ "invalid parameter {!r}".format(token)))
+ if value and value[0] != ';':
+ # Junk after the otherwise valid parameter. Mark it as
+ # invalid, but it will have a value.
+ param = mime_parameters[-1]
+ param.token_type = 'invalid-parameter'
+ token, value = get_invalid_parameter(value)
+ param.extend(token)
+ mime_parameters.defects.append(errors.InvalidHeaderDefect(
+ "parameter with invalid trailing text {!r}".format(token)))
+ if value:
+ # Must be a ';' at this point.
+ mime_parameters.append(ValueTerminal(';', 'parameter-separator'))
+ value = value[1:]
+ return mime_parameters
+
+def _find_mime_parameters(tokenlist, value):
+ """Do our best to find the parameters in an invalid MIME header
+
+ """
+ while value and value[0] != ';':
+ if value[0] in PHRASE_ENDS:
+ tokenlist.append(ValueTerminal(value[0], 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ tokenlist.append(token)
+ if not value:
+ return
+ tokenlist.append(ValueTerminal(';', 'parameter-separator'))
+ tokenlist.append(parse_mime_parameters(value[1:]))
+
+def parse_content_type_header(value):
+ """ maintype "/" subtype *( ";" parameter )
+
+ The maintype and substype are tokens. Theoretically they could
+ be checked against the official IANA list + x-token, but we
+ don't do that.
+ """
+ ctype = ContentType()
+ recover = False
+ if not value:
+ ctype.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing content type specification"))
+ return ctype
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Expected content maintype but found {!r}".format(value)))
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.append(token)
+ # XXX: If we really want to follow the formal grammar we should make
+ # mantype and subtype specialized TokenLists here. Probably not worth it.
+ if not value or value[0] != '/':
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Invalid content type"))
+ if value:
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.maintype = token.value.strip().lower()
+ ctype.append(ValueTerminal('/', 'content-type-separator'))
+ value = value[1:]
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Expected content subtype but found {!r}".format(value)))
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.append(token)
+ ctype.subtype = token.value.strip().lower()
+ if not value:
+ return ctype
+ if value[0] != ';':
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Only parameters are valid after content type, but "
+ "found {!r}".format(value)))
+ # The RFC requires that a syntactically invalid content-type be treated
+ # as text/plain. Perhaps we should postel this, but we should probably
+ # only do that if we were checking the subtype value against IANA.
+ del ctype.maintype, ctype.subtype
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.append(ValueTerminal(';', 'parameter-separator'))
+ ctype.append(parse_mime_parameters(value[1:]))
+ return ctype
+
+def parse_content_disposition_header(value):
+ """ disposition-type *( ";" parameter )
+
+ """
+ disp_header = ContentDisposition()
+ if not value:
+ disp_header.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing content disposition"))
+ return disp_header
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Expected content disposition but found {!r}".format(value)))
+ _find_mime_parameters(disp_header, value)
+ return disp_header
+ disp_header.append(token)
+ disp_header.content_disposition = token.value.strip().lower()
+ if not value:
+ return disp_header
+ if value[0] != ';':
+ disp_header.defects.append(errors.InvalidHeaderDefect(
+ "Only parameters are valid after content disposition, but "
+ "found {!r}".format(value)))
+ _find_mime_parameters(disp_header, value)
+ return disp_header
+ disp_header.append(ValueTerminal(';', 'parameter-separator'))
+ disp_header.append(parse_mime_parameters(value[1:]))
+ return disp_header
+
+def parse_content_transfer_encoding_header(value):
+ """ mechanism
+
+ """
+ # We should probably validate the values, since the list is fixed.
+ cte_header = ContentTransferEncoding()
+ if not value:
+ cte_header.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing content transfer encoding"))
+ return cte_header
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Expected content trnasfer encoding but found {!r}".format(value)))
+ else:
+ cte_header.append(token)
+ cte_header.cte = token.value.strip().lower()
+ if not value:
+ return cte_header
+ while value:
+ cte_header.defects.append(errors.InvalidHeaderDefect(
+ "Extra text after content transfer encoding"))
+ if value[0] in PHRASE_ENDS:
+ cte_header.append(ValueTerminal(value[0], 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ cte_header.append(token)
+ return cte_header
diff --git a/src/future/backports/email/_parseaddr.py b/src/future/backports/email/_parseaddr.py
new file mode 100644
index 00000000..5b50cc6b
--- /dev/null
+++ b/src/future/backports/email/_parseaddr.py
@@ -0,0 +1,546 @@
+# Copyright (C) 2002-2007 Python Software Foundation
+# Contact: email-sig@python.org
+
+"""Email address parsing code.
+
+Lifted directly from rfc822.py. This should eventually be rewritten.
+"""
+
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import int
+
+__all__ = [
+ 'mktime_tz',
+ 'parsedate',
+ 'parsedate_tz',
+ 'quote',
+ ]
+
+import time, calendar
+
+SPACE = ' '
+EMPTYSTRING = ''
+COMMASPACE = ', '
+
+# Parse a date field
+_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
+ 'aug', 'sep', 'oct', 'nov', 'dec',
+ 'january', 'february', 'march', 'april', 'may', 'june', 'july',
+ 'august', 'september', 'october', 'november', 'december']
+
+_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
+
+# The timezone table does not include the military time zones defined
+# in RFC822, other than Z. According to RFC1123, the description in
+# RFC822 gets the signs wrong, so we can't rely on any such time
+# zones. RFC1123 recommends that numeric timezone indicators be used
+# instead of timezone names.
+
+_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
+ 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
+ 'EST': -500, 'EDT': -400, # Eastern
+ 'CST': -600, 'CDT': -500, # Central
+ 'MST': -700, 'MDT': -600, # Mountain
+ 'PST': -800, 'PDT': -700 # Pacific
+ }
+
+
+def parsedate_tz(data):
+ """Convert a date string to a time tuple.
+
+ Accounts for military timezones.
+ """
+ res = _parsedate_tz(data)
+ if not res:
+ return
+ if res[9] is None:
+ res[9] = 0
+ return tuple(res)
+
+def _parsedate_tz(data):
+ """Convert date to extended time tuple.
+
+ The last (additional) element is the time zone offset in seconds, except if
+ the timezone was specified as -0000. In that case the last element is
+ None. This indicates a UTC timestamp that explicitly declaims knowledge of
+ the source timezone, as opposed to a +0000 timestamp that indicates the
+ source timezone really was UTC.
+
+ """
+ if not data:
+ return
+ data = data.split()
+ # The FWS after the comma after the day-of-week is optional, so search and
+ # adjust for this.
+ if data[0].endswith(',') or data[0].lower() in _daynames:
+ # There's a dayname here. Skip it
+ del data[0]
+ else:
+ i = data[0].rfind(',')
+ if i >= 0:
+ data[0] = data[0][i+1:]
+ if len(data) == 3: # RFC 850 date, deprecated
+ stuff = data[0].split('-')
+ if len(stuff) == 3:
+ data = stuff + data[1:]
+ if len(data) == 4:
+ s = data[3]
+ i = s.find('+')
+ if i == -1:
+ i = s.find('-')
+ if i > 0:
+ data[3:] = [s[:i], s[i:]]
+ else:
+ data.append('') # Dummy tz
+ if len(data) < 5:
+ return None
+ data = data[:5]
+ [dd, mm, yy, tm, tz] = data
+ mm = mm.lower()
+ if mm not in _monthnames:
+ dd, mm = mm, dd.lower()
+ if mm not in _monthnames:
+ return None
+ mm = _monthnames.index(mm) + 1
+ if mm > 12:
+ mm -= 12
+ if dd[-1] == ',':
+ dd = dd[:-1]
+ i = yy.find(':')
+ if i > 0:
+ yy, tm = tm, yy
+ if yy[-1] == ',':
+ yy = yy[:-1]
+ if not yy[0].isdigit():
+ yy, tz = tz, yy
+ if tm[-1] == ',':
+ tm = tm[:-1]
+ tm = tm.split(':')
+ if len(tm) == 2:
+ [thh, tmm] = tm
+ tss = '0'
+ elif len(tm) == 3:
+ [thh, tmm, tss] = tm
+ elif len(tm) == 1 and '.' in tm[0]:
+ # Some non-compliant MUAs use '.' to separate time elements.
+ tm = tm[0].split('.')
+ if len(tm) == 2:
+ [thh, tmm] = tm
+ tss = 0
+ elif len(tm) == 3:
+ [thh, tmm, tss] = tm
+ else:
+ return None
+ try:
+ yy = int(yy)
+ dd = int(dd)
+ thh = int(thh)
+ tmm = int(tmm)
+ tss = int(tss)
+ except ValueError:
+ return None
+ # Check for a yy specified in two-digit format, then convert it to the
+ # appropriate four-digit format, according to the POSIX standard. RFC 822
+ # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
+ # mandates a 4-digit yy. For more information, see the documentation for
+ # the time module.
+ if yy < 100:
+ # The year is between 1969 and 1999 (inclusive).
+ if yy > 68:
+ yy += 1900
+ # The year is between 2000 and 2068 (inclusive).
+ else:
+ yy += 2000
+ tzoffset = None
+ tz = tz.upper()
+ if tz in _timezones:
+ tzoffset = _timezones[tz]
+ else:
+ try:
+ tzoffset = int(tz)
+ except ValueError:
+ pass
+ if tzoffset==0 and tz.startswith('-'):
+ tzoffset = None
+ # Convert a timezone offset into seconds ; -0500 -> -18000
+ if tzoffset:
+ if tzoffset < 0:
+ tzsign = -1
+ tzoffset = -tzoffset
+ else:
+ tzsign = 1
+ tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
+ # Daylight Saving Time flag is set to -1, since DST is unknown.
+ return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
+
+
+def parsedate(data):
+ """Convert a time string to a time tuple."""
+ t = parsedate_tz(data)
+ if isinstance(t, tuple):
+ return t[:9]
+ else:
+ return t
+
+
+def mktime_tz(data):
+ """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
+ if data[9] is None:
+ # No zone info, so localtime is better assumption than GMT
+ return time.mktime(data[:8] + (-1,))
+ else:
+ t = calendar.timegm(data)
+ return t - data[9]
+
+
+def quote(str):
+ """Prepare string to be used in a quoted string.
+
+ Turns backslash and double quote characters into quoted pairs. These
+ are the only characters that need to be quoted inside a quoted string.
+ Does not add the surrounding double quotes.
+ """
+ return str.replace('\\', '\\\\').replace('"', '\\"')
+
+
+class AddrlistClass(object):
+ """Address parser class by Ben Escoto.
+
+ To understand what this class does, it helps to have a copy of RFC 2822 in
+ front of you.
+
+ Note: this class interface is deprecated and may be removed in the future.
+ Use email.utils.AddressList instead.
+ """
+
+ def __init__(self, field):
+ """Initialize a new instance.
+
+ `field' is an unparsed address header field, containing
+ one or more addresses.
+ """
+ self.specials = '()<>@,:;.\"[]'
+ self.pos = 0
+ self.LWS = ' \t'
+ self.CR = '\r\n'
+ self.FWS = self.LWS + self.CR
+ self.atomends = self.specials + self.LWS + self.CR
+ # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
+ # is obsolete syntax. RFC 2822 requires that we recognize obsolete
+ # syntax, so allow dots in phrases.
+ self.phraseends = self.atomends.replace('.', '')
+ self.field = field
+ self.commentlist = []
+
+ def gotonext(self):
+ """Skip white space and extract comments."""
+ wslist = []
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.LWS + '\n\r':
+ if self.field[self.pos] not in '\n\r':
+ wslist.append(self.field[self.pos])
+ self.pos += 1
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ else:
+ break
+ return EMPTYSTRING.join(wslist)
+
+ def getaddrlist(self):
+ """Parse all addresses.
+
+ Returns a list containing all of the addresses.
+ """
+ result = []
+ while self.pos < len(self.field):
+ ad = self.getaddress()
+ if ad:
+ result += ad
+ else:
+ result.append(('', ''))
+ return result
+
+ def getaddress(self):
+ """Parse the next address."""
+ self.commentlist = []
+ self.gotonext()
+
+ oldpos = self.pos
+ oldcl = self.commentlist
+ plist = self.getphraselist()
+
+ self.gotonext()
+ returnlist = []
+
+ if self.pos >= len(self.field):
+ # Bad email address technically, no domain.
+ if plist:
+ returnlist = [(SPACE.join(self.commentlist), plist[0])]
+
+ elif self.field[self.pos] in '.@':
+ # email address is just an addrspec
+ # this isn't very efficient since we start over
+ self.pos = oldpos
+ self.commentlist = oldcl
+ addrspec = self.getaddrspec()
+ returnlist = [(SPACE.join(self.commentlist), addrspec)]
+
+ elif self.field[self.pos] == ':':
+ # address is a group
+ returnlist = []
+
+ fieldlen = len(self.field)
+ self.pos += 1
+ while self.pos < len(self.field):
+ self.gotonext()
+ if self.pos < fieldlen and self.field[self.pos] == ';':
+ self.pos += 1
+ break
+ returnlist = returnlist + self.getaddress()
+
+ elif self.field[self.pos] == '<':
+ # Address is a phrase then a route addr
+ routeaddr = self.getrouteaddr()
+
+ if self.commentlist:
+ returnlist = [(SPACE.join(plist) + ' (' +
+ ' '.join(self.commentlist) + ')', routeaddr)]
+ else:
+ returnlist = [(SPACE.join(plist), routeaddr)]
+
+ else:
+ if plist:
+ returnlist = [(SPACE.join(self.commentlist), plist[0])]
+ elif self.field[self.pos] in self.specials:
+ self.pos += 1
+
+ self.gotonext()
+ if self.pos < len(self.field) and self.field[self.pos] == ',':
+ self.pos += 1
+ return returnlist
+
+ def getrouteaddr(self):
+ """Parse a route address (Return-path value).
+
+ This method just skips all the route stuff and returns the addrspec.
+ """
+ if self.field[self.pos] != '<':
+ return
+
+ expectroute = False
+ self.pos += 1
+ self.gotonext()
+ adlist = ''
+ while self.pos < len(self.field):
+ if expectroute:
+ self.getdomain()
+ expectroute = False
+ elif self.field[self.pos] == '>':
+ self.pos += 1
+ break
+ elif self.field[self.pos] == '@':
+ self.pos += 1
+ expectroute = True
+ elif self.field[self.pos] == ':':
+ self.pos += 1
+ else:
+ adlist = self.getaddrspec()
+ self.pos += 1
+ break
+ self.gotonext()
+
+ return adlist
+
+ def getaddrspec(self):
+ """Parse an RFC 2822 addr-spec."""
+ aslist = []
+
+ self.gotonext()
+ while self.pos < len(self.field):
+ preserve_ws = True
+ if self.field[self.pos] == '.':
+ if aslist and not aslist[-1].strip():
+ aslist.pop()
+ aslist.append('.')
+ self.pos += 1
+ preserve_ws = False
+ elif self.field[self.pos] == '"':
+ aslist.append('"%s"' % quote(self.getquote()))
+ elif self.field[self.pos] in self.atomends:
+ if aslist and not aslist[-1].strip():
+ aslist.pop()
+ break
+ else:
+ aslist.append(self.getatom())
+ ws = self.gotonext()
+ if preserve_ws and ws:
+ aslist.append(ws)
+
+ if self.pos >= len(self.field) or self.field[self.pos] != '@':
+ return EMPTYSTRING.join(aslist)
+
+ aslist.append('@')
+ self.pos += 1
+ self.gotonext()
+ return EMPTYSTRING.join(aslist) + self.getdomain()
+
+ def getdomain(self):
+ """Get the complete domain name from an address."""
+ sdlist = []
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.LWS:
+ self.pos += 1
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ elif self.field[self.pos] == '[':
+ sdlist.append(self.getdomainliteral())
+ elif self.field[self.pos] == '.':
+ self.pos += 1
+ sdlist.append('.')
+ elif self.field[self.pos] in self.atomends:
+ break
+ else:
+ sdlist.append(self.getatom())
+ return EMPTYSTRING.join(sdlist)
+
+ def getdelimited(self, beginchar, endchars, allowcomments=True):
+ """Parse a header fragment delimited by special characters.
+
+ `beginchar' is the start character for the fragment.
+ If self is not looking at an instance of `beginchar' then
+ getdelimited returns the empty string.
+
+ `endchars' is a sequence of allowable end-delimiting characters.
+ Parsing stops when one of these is encountered.
+
+ If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
+ within the parsed fragment.
+ """
+ if self.field[self.pos] != beginchar:
+ return ''
+
+ slist = ['']
+ quote = False
+ self.pos += 1
+ while self.pos < len(self.field):
+ if quote:
+ slist.append(self.field[self.pos])
+ quote = False
+ elif self.field[self.pos] in endchars:
+ self.pos += 1
+ break
+ elif allowcomments and self.field[self.pos] == '(':
+ slist.append(self.getcomment())
+ continue # have already advanced pos from getcomment
+ elif self.field[self.pos] == '\\':
+ quote = True
+ else:
+ slist.append(self.field[self.pos])
+ self.pos += 1
+
+ return EMPTYSTRING.join(slist)
+
+ def getquote(self):
+ """Get a quote-delimited fragment from self's field."""
+ return self.getdelimited('"', '"\r', False)
+
+ def getcomment(self):
+ """Get a parenthesis-delimited fragment from self's field."""
+ return self.getdelimited('(', ')\r', True)
+
+ def getdomainliteral(self):
+ """Parse an RFC 2822 domain-literal."""
+ return '[%s]' % self.getdelimited('[', ']\r', False)
+
+ def getatom(self, atomends=None):
+ """Parse an RFC 2822 atom.
+
+ Optional atomends specifies a different set of end token delimiters
+ (the default is to use self.atomends). This is used e.g. in
+ getphraselist() since phrase endings must not include the `.' (which
+ is legal in phrases)."""
+ atomlist = ['']
+ if atomends is None:
+ atomends = self.atomends
+
+ while self.pos < len(self.field):
+ if self.field[self.pos] in atomends:
+ break
+ else:
+ atomlist.append(self.field[self.pos])
+ self.pos += 1
+
+ return EMPTYSTRING.join(atomlist)
+
+ def getphraselist(self):
+ """Parse a sequence of RFC 2822 phrases.
+
+ A phrase is a sequence of words, which are in turn either RFC 2822
+ atoms or quoted-strings. Phrases are canonicalized by squeezing all
+ runs of continuous whitespace into one space.
+ """
+ plist = []
+
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.FWS:
+ self.pos += 1
+ elif self.field[self.pos] == '"':
+ plist.append(self.getquote())
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ elif self.field[self.pos] in self.phraseends:
+ break
+ else:
+ plist.append(self.getatom(self.phraseends))
+
+ return plist
+
+class AddressList(AddrlistClass):
+ """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
+ def __init__(self, field):
+ AddrlistClass.__init__(self, field)
+ if field:
+ self.addresslist = self.getaddrlist()
+ else:
+ self.addresslist = []
+
+ def __len__(self):
+ return len(self.addresslist)
+
+ def __add__(self, other):
+ # Set union
+ newaddr = AddressList(None)
+ newaddr.addresslist = self.addresslist[:]
+ for x in other.addresslist:
+ if not x in self.addresslist:
+ newaddr.addresslist.append(x)
+ return newaddr
+
+ def __iadd__(self, other):
+ # Set union, in-place
+ for x in other.addresslist:
+ if not x in self.addresslist:
+ self.addresslist.append(x)
+ return self
+
+ def __sub__(self, other):
+ # Set difference
+ newaddr = AddressList(None)
+ for x in self.addresslist:
+ if not x in other.addresslist:
+ newaddr.addresslist.append(x)
+ return newaddr
+
+ def __isub__(self, other):
+ # Set difference, in-place
+ for x in other.addresslist:
+ if x in self.addresslist:
+ self.addresslist.remove(x)
+ return self
+
+ def __getitem__(self, index):
+ # Make indexing, slices, and 'in' work
+ return self.addresslist[index]
diff --git a/src/future/backports/email/_policybase.py b/src/future/backports/email/_policybase.py
new file mode 100644
index 00000000..c66aea90
--- /dev/null
+++ b/src/future/backports/email/_policybase.py
@@ -0,0 +1,365 @@
+"""Policy framework for the email package.
+
+Allows fine grained feature control of how the package parses and emits data.
+"""
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import super
+from future.builtins import str
+from future.utils import with_metaclass
+
+import abc
+from future.backports.email import header
+from future.backports.email import charset as _charset
+from future.backports.email.utils import _has_surrogates
+
+__all__ = [
+ 'Policy',
+ 'Compat32',
+ 'compat32',
+ ]
+
+
+class _PolicyBase(object):
+
+ """Policy Object basic framework.
+
+ This class is useless unless subclassed. A subclass should define
+ class attributes with defaults for any values that are to be
+ managed by the Policy object. The constructor will then allow
+ non-default values to be set for these attributes at instance
+ creation time. The instance will be callable, taking these same
+ attributes keyword arguments, and returning a new instance
+ identical to the called instance except for those values changed
+ by the keyword arguments. Instances may be added, yielding new
+ instances with any non-default values from the right hand
+ operand overriding those in the left hand operand. That is,
+
+ A + B == A()
+
+ The repr of an instance can be used to reconstruct the object
+ if and only if the repr of the values can be used to reconstruct
+ those values.
+
+ """
+
+ def __init__(self, **kw):
+ """Create new Policy, possibly overriding some defaults.
+
+ See class docstring for a list of overridable attributes.
+
+ """
+ for name, value in kw.items():
+ if hasattr(self, name):
+ super(_PolicyBase,self).__setattr__(name, value)
+ else:
+ raise TypeError(
+ "{!r} is an invalid keyword argument for {}".format(
+ name, self.__class__.__name__))
+
+ def __repr__(self):
+ args = [ "{}={!r}".format(name, value)
+ for name, value in self.__dict__.items() ]
+ return "{}({})".format(self.__class__.__name__, ', '.join(args))
+
+ def clone(self, **kw):
+ """Return a new instance with specified attributes changed.
+
+ The new instance has the same attribute values as the current object,
+ except for the changes passed in as keyword arguments.
+
+ """
+ newpolicy = self.__class__.__new__(self.__class__)
+ for attr, value in self.__dict__.items():
+ object.__setattr__(newpolicy, attr, value)
+ for attr, value in kw.items():
+ if not hasattr(self, attr):
+ raise TypeError(
+ "{!r} is an invalid keyword argument for {}".format(
+ attr, self.__class__.__name__))
+ object.__setattr__(newpolicy, attr, value)
+ return newpolicy
+
+ def __setattr__(self, name, value):
+ if hasattr(self, name):
+ msg = "{!r} object attribute {!r} is read-only"
+ else:
+ msg = "{!r} object has no attribute {!r}"
+ raise AttributeError(msg.format(self.__class__.__name__, name))
+
+ def __add__(self, other):
+ """Non-default values from right operand override those from left.
+
+ The object returned is a new instance of the subclass.
+
+ """
+ return self.clone(**other.__dict__)
+
+
+def _append_doc(doc, added_doc):
+ doc = doc.rsplit('\n', 1)[0]
+ added_doc = added_doc.split('\n', 1)[1]
+ return doc + '\n' + added_doc
+
+def _extend_docstrings(cls):
+ if cls.__doc__ and cls.__doc__.startswith('+'):
+ cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
+ for name, attr in cls.__dict__.items():
+ if attr.__doc__ and attr.__doc__.startswith('+'):
+ for c in (c for base in cls.__bases__ for c in base.mro()):
+ doc = getattr(getattr(c, name), '__doc__')
+ if doc:
+ attr.__doc__ = _append_doc(doc, attr.__doc__)
+ break
+ return cls
+
+
+class Policy(with_metaclass(abc.ABCMeta, _PolicyBase)):
+
+ r"""Controls for how messages are interpreted and formatted.
+
+ Most of the classes and many of the methods in the email package accept
+ Policy objects as parameters. A Policy object contains a set of values and
+ functions that control how input is interpreted and how output is rendered.
+ For example, the parameter 'raise_on_defect' controls whether or not an RFC
+ violation results in an error being raised or not, while 'max_line_length'
+ controls the maximum length of output lines when a Message is serialized.
+
+ Any valid attribute may be overridden when a Policy is created by passing
+ it as a keyword argument to the constructor. Policy objects are immutable,
+ but a new Policy object can be created with only certain values changed by
+ calling the Policy instance with keyword arguments. Policy objects can
+ also be added, producing a new Policy object in which the non-default
+ attributes set in the right hand operand overwrite those specified in the
+ left operand.
+
+ Settable attributes:
+
+ raise_on_defect -- If true, then defects should be raised as errors.
+ Default: False.
+
+ linesep -- string containing the value to use as separation
+ between output lines. Default '\n'.
+
+ cte_type -- Type of allowed content transfer encodings
+
+ 7bit -- ASCII only
+ 8bit -- Content-Transfer-Encoding: 8bit is allowed
+
+ Default: 8bit. Also controls the disposition of
+ (RFC invalid) binary data in headers; see the
+ documentation of the binary_fold method.
+
+ max_line_length -- maximum length of lines, excluding 'linesep',
+ during serialization. None or 0 means no line
+ wrapping is done. Default is 78.
+
+ """
+
+ raise_on_defect = False
+ linesep = '\n'
+ cte_type = '8bit'
+ max_line_length = 78
+
+ def handle_defect(self, obj, defect):
+ """Based on policy, either raise defect or call register_defect.
+
+ handle_defect(obj, defect)
+
+ defect should be a Defect subclass, but in any case must be an
+ Exception subclass. obj is the object on which the defect should be
+ registered if it is not raised. If the raise_on_defect is True, the
+ defect is raised as an error, otherwise the object and the defect are
+ passed to register_defect.
+
+ This method is intended to be called by parsers that discover defects.
+ The email package parsers always call it with Defect instances.
+
+ """
+ if self.raise_on_defect:
+ raise defect
+ self.register_defect(obj, defect)
+
+ def register_defect(self, obj, defect):
+ """Record 'defect' on 'obj'.
+
+ Called by handle_defect if raise_on_defect is False. This method is
+ part of the Policy API so that Policy subclasses can implement custom
+ defect handling. The default implementation calls the append method of
+ the defects attribute of obj. The objects used by the email package by
+ default that get passed to this method will always have a defects
+ attribute with an append method.
+
+ """
+ obj.defects.append(defect)
+
+ def header_max_count(self, name):
+ """Return the maximum allowed number of headers named 'name'.
+
+ Called when a header is added to a Message object. If the returned
+ value is not 0 or None, and there are already a number of headers with
+ the name 'name' equal to the value returned, a ValueError is raised.
+
+ Because the default behavior of Message's __setitem__ is to append the
+ value to the list of headers, it is easy to create duplicate headers
+ without realizing it. This method allows certain headers to be limited
+ in the number of instances of that header that may be added to a
+ Message programmatically. (The limit is not observed by the parser,
+ which will faithfully produce as many headers as exist in the message
+ being parsed.)
+
+ The default implementation returns None for all header names.
+ """
+ return None
+
+ @abc.abstractmethod
+ def header_source_parse(self, sourcelines):
+ """Given a list of linesep terminated strings constituting the lines of
+ a single header, return the (name, value) tuple that should be stored
+ in the model. The input lines should retain their terminating linesep
+ characters. The lines passed in by the email package may contain
+ surrogateescaped binary data.
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def header_store_parse(self, name, value):
+ """Given the header name and the value provided by the application
+ program, return the (name, value) that should be stored in the model.
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def header_fetch_parse(self, name, value):
+ """Given the header name and the value from the model, return the value
+ to be returned to the application program that is requesting that
+ header. The value passed in by the email package may contain
+ surrogateescaped binary data if the lines were parsed by a BytesParser.
+ The returned value should not contain any surrogateescaped data.
+
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def fold(self, name, value):
+ """Given the header name and the value from the model, return a string
+ containing linesep characters that implement the folding of the header
+ according to the policy controls. The value passed in by the email
+ package may contain surrogateescaped binary data if the lines were
+ parsed by a BytesParser. The returned value should not contain any
+ surrogateescaped data.
+
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def fold_binary(self, name, value):
+ """Given the header name and the value from the model, return binary
+ data containing linesep characters that implement the folding of the
+ header according to the policy controls. The value passed in by the
+ email package may contain surrogateescaped binary data.
+
+ """
+ raise NotImplementedError
+
+
+@_extend_docstrings
+class Compat32(Policy):
+
+ """+
+ This particular policy is the backward compatibility Policy. It
+ replicates the behavior of the email package version 5.1.
+ """
+
+ def _sanitize_header(self, name, value):
+ # If the header value contains surrogates, return a Header using
+ # the unknown-8bit charset to encode the bytes as encoded words.
+ if not isinstance(value, str):
+ # Assume it is already a header object
+ return value
+ if _has_surrogates(value):
+ return header.Header(value, charset=_charset.UNKNOWN8BIT,
+ header_name=name)
+ else:
+ return value
+
+ def header_source_parse(self, sourcelines):
+ """+
+ The name is parsed as everything up to the ':' and returned unmodified.
+ The value is determined by stripping leading whitespace off the
+ remainder of the first line, joining all subsequent lines together, and
+ stripping any trailing carriage return or linefeed characters.
+
+ """
+ name, value = sourcelines[0].split(':', 1)
+ value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+ return (name, value.rstrip('\r\n'))
+
+ def header_store_parse(self, name, value):
+ """+
+ The name and value are returned unmodified.
+ """
+ return (name, value)
+
+ def header_fetch_parse(self, name, value):
+ """+
+ If the value contains binary data, it is converted into a Header object
+ using the unknown-8bit charset. Otherwise it is returned unmodified.
+ """
+ return self._sanitize_header(name, value)
+
+ def fold(self, name, value):
+ """+
+ Headers are folded using the Header folding algorithm, which preserves
+ existing line breaks in the value, and wraps each resulting line to the
+ max_line_length. Non-ASCII binary data are CTE encoded using the
+ unknown-8bit charset.
+
+ """
+ return self._fold(name, value, sanitize=True)
+
+ def fold_binary(self, name, value):
+ """+
+ Headers are folded using the Header folding algorithm, which preserves
+ existing line breaks in the value, and wraps each resulting line to the
+ max_line_length. If cte_type is 7bit, non-ascii binary data is CTE
+ encoded using the unknown-8bit charset. Otherwise the original source
+ header is used, with its existing line breaks and/or binary data.
+
+ """
+ folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
+ return folded.encode('ascii', 'surrogateescape')
+
+ def _fold(self, name, value, sanitize):
+ parts = []
+ parts.append('%s: ' % name)
+ if isinstance(value, str):
+ if _has_surrogates(value):
+ if sanitize:
+ h = header.Header(value,
+ charset=_charset.UNKNOWN8BIT,
+ header_name=name)
+ else:
+ # If we have raw 8bit data in a byte string, we have no idea
+ # what the encoding is. There is no safe way to split this
+ # string. If it's ascii-subset, then we could do a normal
+ # ascii split, but if it's multibyte then we could break the
+ # string. There's no way to know so the least harm seems to
+ # be to not split the string and risk it being too long.
+ parts.append(value)
+ h = None
+ else:
+ h = header.Header(value, header_name=name)
+ else:
+ # Assume it is a Header-like object.
+ h = value
+ if h is not None:
+ parts.append(h.encode(linesep=self.linesep,
+ maxlinelen=self.max_line_length))
+ parts.append(self.linesep)
+ return ''.join(parts)
+
+
+compat32 = Compat32()
diff --git a/src/future/backports/email/base64mime.py b/src/future/backports/email/base64mime.py
new file mode 100644
index 00000000..296392a6
--- /dev/null
+++ b/src/future/backports/email/base64mime.py
@@ -0,0 +1,121 @@
+# Copyright (C) 2002-2007 Python Software Foundation
+# Author: Ben Gertzfield
+# Contact: email-sig@python.org
+
+"""Base64 content transfer encoding per RFCs 2045-2047.
+
+This module handles the content transfer encoding method defined in RFC 2045
+to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
+characters encoding known as Base64.
+
+It is used in the MIME standards for email to attach images, audio, and text
+using some 8-bit character sets to messages.
+
+This module provides an interface to encode and decode both headers and bodies
+with Base64 encoding.
+
+RFC 2045 defines a method for including character set information in an
+`encoded-word' in a header. This method is commonly used for 8-bit real names
+in To:, From:, Cc:, etc. fields, as well as Subject: lines.
+
+This module does not do the line wrapping or end-of-line character conversion
+necessary for proper internationalized headers; it only does dumb encoding and
+decoding. To deal with the various line wrapping issues, use the email.header
+module.
+"""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import range
+from future.builtins import bytes
+from future.builtins import str
+
+__all__ = [
+ 'body_decode',
+ 'body_encode',
+ 'decode',
+ 'decodestring',
+ 'header_encode',
+ 'header_length',
+ ]
+
+
+from base64 import b64encode
+from binascii import b2a_base64, a2b_base64
+
+CRLF = '\r\n'
+NL = '\n'
+EMPTYSTRING = ''
+
+# See also Charset.py
+MISC_LEN = 7
+
+
+# Helpers
+def header_length(bytearray):
+ """Return the length of s when it is encoded with base64."""
+ groups_of_3, leftover = divmod(len(bytearray), 3)
+ # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
+ n = groups_of_3 * 4
+ if leftover:
+ n += 4
+ return n
+
+
+def header_encode(header_bytes, charset='iso-8859-1'):
+ """Encode a single header line with Base64 encoding in a given charset.
+
+ charset names the character set to use to encode the header. It defaults
+ to iso-8859-1. Base64 encoding is defined in RFC 2045.
+ """
+ if not header_bytes:
+ return ""
+ if isinstance(header_bytes, str):
+ header_bytes = header_bytes.encode(charset)
+ encoded = b64encode(header_bytes).decode("ascii")
+ return '=?%s?b?%s?=' % (charset, encoded)
+
+
+def body_encode(s, maxlinelen=76, eol=NL):
+ r"""Encode a string with base64.
+
+ Each line will be wrapped at, at most, maxlinelen characters (defaults to
+ 76 characters).
+
+ Each line of encoded text will end with eol, which defaults to "\n". Set
+ this to "\r\n" if you will be using the result of this function directly
+ in an email.
+ """
+ if not s:
+ return s
+
+ encvec = []
+ max_unencoded = maxlinelen * 3 // 4
+ for i in range(0, len(s), max_unencoded):
+ # BAW: should encode() inherit b2a_base64()'s dubious behavior in
+ # adding a newline to the encoded string?
+ enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
+ if enc.endswith(NL) and eol != NL:
+ enc = enc[:-1] + eol
+ encvec.append(enc)
+ return EMPTYSTRING.join(encvec)
+
+
+def decode(string):
+ """Decode a raw base64 string, returning a bytes object.
+
+ This function does not parse a full MIME header value encoded with
+ base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
+ level email.header class for that functionality.
+ """
+ if not string:
+ return bytes()
+ elif isinstance(string, str):
+ return a2b_base64(string.encode('raw-unicode-escape'))
+ else:
+ return a2b_base64(string)
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_decode = decode
+decodestring = decode
diff --git a/src/future/backports/email/charset.py b/src/future/backports/email/charset.py
new file mode 100644
index 00000000..2385ce68
--- /dev/null
+++ b/src/future/backports/email/charset.py
@@ -0,0 +1,409 @@
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import str
+from future.builtins import next
+
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Ben Gertzfield, Barry Warsaw
+# Contact: email-sig@python.org
+
+__all__ = [
+ 'Charset',
+ 'add_alias',
+ 'add_charset',
+ 'add_codec',
+ ]
+
+from functools import partial
+
+from future.backports import email
+from future.backports.email import errors
+from future.backports.email.encoders import encode_7or8bit
+
+
+# Flags for types of header encodings
+QP = 1 # Quoted-Printable
+BASE64 = 2 # Base64
+SHORTEST = 3 # the shorter of QP and base64, but only for headers
+
+# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
+RFC2047_CHROME_LEN = 7
+
+DEFAULT_CHARSET = 'us-ascii'
+UNKNOWN8BIT = 'unknown-8bit'
+EMPTYSTRING = ''
+
+
+# Defaults
+CHARSETS = {
+ # input header enc body enc output conv
+ 'iso-8859-1': (QP, QP, None),
+ 'iso-8859-2': (QP, QP, None),
+ 'iso-8859-3': (QP, QP, None),
+ 'iso-8859-4': (QP, QP, None),
+ # iso-8859-5 is Cyrillic, and not especially used
+ # iso-8859-6 is Arabic, also not particularly used
+ # iso-8859-7 is Greek, QP will not make it readable
+ # iso-8859-8 is Hebrew, QP will not make it readable
+ 'iso-8859-9': (QP, QP, None),
+ 'iso-8859-10': (QP, QP, None),
+ # iso-8859-11 is Thai, QP will not make it readable
+ 'iso-8859-13': (QP, QP, None),
+ 'iso-8859-14': (QP, QP, None),
+ 'iso-8859-15': (QP, QP, None),
+ 'iso-8859-16': (QP, QP, None),
+ 'windows-1252':(QP, QP, None),
+ 'viscii': (QP, QP, None),
+ 'us-ascii': (None, None, None),
+ 'big5': (BASE64, BASE64, None),
+ 'gb2312': (BASE64, BASE64, None),
+ 'euc-jp': (BASE64, None, 'iso-2022-jp'),
+ 'shift_jis': (BASE64, None, 'iso-2022-jp'),
+ 'iso-2022-jp': (BASE64, None, None),
+ 'koi8-r': (BASE64, BASE64, None),
+ 'utf-8': (SHORTEST, BASE64, 'utf-8'),
+ }
+
+# Aliases for other commonly-used names for character sets. Map
+# them to the real ones used in email.
+ALIASES = {
+ 'latin_1': 'iso-8859-1',
+ 'latin-1': 'iso-8859-1',
+ 'latin_2': 'iso-8859-2',
+ 'latin-2': 'iso-8859-2',
+ 'latin_3': 'iso-8859-3',
+ 'latin-3': 'iso-8859-3',
+ 'latin_4': 'iso-8859-4',
+ 'latin-4': 'iso-8859-4',
+ 'latin_5': 'iso-8859-9',
+ 'latin-5': 'iso-8859-9',
+ 'latin_6': 'iso-8859-10',
+ 'latin-6': 'iso-8859-10',
+ 'latin_7': 'iso-8859-13',
+ 'latin-7': 'iso-8859-13',
+ 'latin_8': 'iso-8859-14',
+ 'latin-8': 'iso-8859-14',
+ 'latin_9': 'iso-8859-15',
+ 'latin-9': 'iso-8859-15',
+ 'latin_10':'iso-8859-16',
+ 'latin-10':'iso-8859-16',
+ 'cp949': 'ks_c_5601-1987',
+ 'euc_jp': 'euc-jp',
+ 'euc_kr': 'euc-kr',
+ 'ascii': 'us-ascii',
+ }
+
+
+# Map charsets to their Unicode codec strings.
+CODEC_MAP = {
+ 'gb2312': 'eucgb2312_cn',
+ 'big5': 'big5_tw',
+ # Hack: We don't want *any* conversion for stuff marked us-ascii, as all
+ # sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
+ # Let that stuff pass through without conversion to/from Unicode.
+ 'us-ascii': None,
+ }
+
+
+# Convenience functions for extending the above mappings
+def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
+ """Add character set properties to the global registry.
+
+ charset is the input character set, and must be the canonical name of a
+ character set.
+
+ Optional header_enc and body_enc is either Charset.QP for
+ quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for
+ the shortest of qp or base64 encoding, or None for no encoding. SHORTEST
+ is only valid for header_enc. It describes how message headers and
+ message bodies in the input charset are to be encoded. Default is no
+ encoding.
+
+ Optional output_charset is the character set that the output should be
+ in. Conversions will proceed from input charset, to Unicode, to the
+ output charset when the method Charset.convert() is called. The default
+ is to output in the same character set as the input.
+
+ Both input_charset and output_charset must have Unicode codec entries in
+ the module's charset-to-codec mapping; use add_codec(charset, codecname)
+ to add codecs the module does not know about. See the codecs module's
+ documentation for more information.
+ """
+ if body_enc == SHORTEST:
+ raise ValueError('SHORTEST not allowed for body_enc')
+ CHARSETS[charset] = (header_enc, body_enc, output_charset)
+
+
+def add_alias(alias, canonical):
+ """Add a character set alias.
+
+ alias is the alias name, e.g. latin-1
+ canonical is the character set's canonical name, e.g. iso-8859-1
+ """
+ ALIASES[alias] = canonical
+
+
+def add_codec(charset, codecname):
+ """Add a codec that map characters in the given charset to/from Unicode.
+
+ charset is the canonical name of a character set. codecname is the name
+ of a Python codec, as appropriate for the second argument to the unicode()
+ built-in, or to the encode() method of a Unicode string.
+ """
+ CODEC_MAP[charset] = codecname
+
+
+# Convenience function for encoding strings, taking into account
+# that they might be unknown-8bit (ie: have surrogate-escaped bytes)
+def _encode(string, codec):
+ string = str(string)
+ if codec == UNKNOWN8BIT:
+ return string.encode('ascii', 'surrogateescape')
+ else:
+ return string.encode(codec)
+
+
+class Charset(object):
+ """Map character sets to their email properties.
+
+ This class provides information about the requirements imposed on email
+ for a specific character set. It also provides convenience routines for
+ converting between character sets, given the availability of the
+ applicable codecs. Given a character set, it will do its best to provide
+ information on how to use that character set in an email in an
+ RFC-compliant way.
+
+ Certain character sets must be encoded with quoted-printable or base64
+ when used in email headers or bodies. Certain character sets must be
+ converted outright, and are not allowed in email. Instances of this
+ module expose the following information about a character set:
+
+ input_charset: The initial character set specified. Common aliases
+ are converted to their `official' email names (e.g. latin_1
+ is converted to iso-8859-1). Defaults to 7-bit us-ascii.
+
+ header_encoding: If the character set must be encoded before it can be
+ used in an email header, this attribute will be set to
+ Charset.QP (for quoted-printable), Charset.BASE64 (for
+ base64 encoding), or Charset.SHORTEST for the shortest of
+ QP or BASE64 encoding. Otherwise, it will be None.
+
+ body_encoding: Same as header_encoding, but describes the encoding for the
+ mail message's body, which indeed may be different than the
+ header encoding. Charset.SHORTEST is not allowed for
+ body_encoding.
+
+ output_charset: Some character sets must be converted before they can be
+ used in email headers or bodies. If the input_charset is
+ one of them, this attribute will contain the name of the
+ charset output will be converted to. Otherwise, it will
+ be None.
+
+ input_codec: The name of the Python codec used to convert the
+ input_charset to Unicode. If no conversion codec is
+ necessary, this attribute will be None.
+
+ output_codec: The name of the Python codec used to convert Unicode
+ to the output_charset. If no conversion codec is necessary,
+ this attribute will have the same value as the input_codec.
+ """
+ def __init__(self, input_charset=DEFAULT_CHARSET):
+ # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to
+ # unicode because its .lower() is locale insensitive. If the argument
+ # is already a unicode, we leave it at that, but ensure that the
+ # charset is ASCII, as the standard (RFC XXX) requires.
+ try:
+ if isinstance(input_charset, str):
+ input_charset.encode('ascii')
+ else:
+ input_charset = str(input_charset, 'ascii')
+ except UnicodeError:
+ raise errors.CharsetError(input_charset)
+ input_charset = input_charset.lower()
+ # Set the input charset after filtering through the aliases
+ self.input_charset = ALIASES.get(input_charset, input_charset)
+ # We can try to guess which encoding and conversion to use by the
+ # charset_map dictionary. Try that first, but let the user override
+ # it.
+ henc, benc, conv = CHARSETS.get(self.input_charset,
+ (SHORTEST, BASE64, None))
+ if not conv:
+ conv = self.input_charset
+ # Set the attributes, allowing the arguments to override the default.
+ self.header_encoding = henc
+ self.body_encoding = benc
+ self.output_charset = ALIASES.get(conv, conv)
+ # Now set the codecs. If one isn't defined for input_charset,
+ # guess and try a Unicode codec with the same name as input_codec.
+ self.input_codec = CODEC_MAP.get(self.input_charset,
+ self.input_charset)
+ self.output_codec = CODEC_MAP.get(self.output_charset,
+ self.output_charset)
+
+ def __str__(self):
+ return self.input_charset.lower()
+
+ __repr__ = __str__
+
+ def __eq__(self, other):
+ return str(self) == str(other).lower()
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def get_body_encoding(self):
+ """Return the content-transfer-encoding used for body encoding.
+
+ This is either the string `quoted-printable' or `base64' depending on
+ the encoding used, or it is a function in which case you should call
+ the function with a single argument, the Message object being
+ encoded. The function should then set the Content-Transfer-Encoding
+ header itself to whatever is appropriate.
+
+ Returns "quoted-printable" if self.body_encoding is QP.
+ Returns "base64" if self.body_encoding is BASE64.
+ Returns conversion function otherwise.
+ """
+ assert self.body_encoding != SHORTEST
+ if self.body_encoding == QP:
+ return 'quoted-printable'
+ elif self.body_encoding == BASE64:
+ return 'base64'
+ else:
+ return encode_7or8bit
+
+ def get_output_charset(self):
+ """Return the output character set.
+
+ This is self.output_charset if that is not None, otherwise it is
+ self.input_charset.
+ """
+ return self.output_charset or self.input_charset
+
+ def header_encode(self, string):
+ """Header-encode a string by converting it first to bytes.
+
+ The type of encoding (base64 or quoted-printable) will be based on
+ this charset's `header_encoding`.
+
+ :param string: A unicode string for the header. It must be possible
+ to encode this string to bytes using the character set's
+ output codec.
+ :return: The encoded string, with RFC 2047 chrome.
+ """
+ codec = self.output_codec or 'us-ascii'
+ header_bytes = _encode(string, codec)
+ # 7bit/8bit encodings return the string unchanged (modulo conversions)
+ encoder_module = self._get_encoder(header_bytes)
+ if encoder_module is None:
+ return string
+ return encoder_module.header_encode(header_bytes, codec)
+
+ def header_encode_lines(self, string, maxlengths):
+ """Header-encode a string by converting it first to bytes.
+
+ This is similar to `header_encode()` except that the string is fit
+ into maximum line lengths as given by the argument.
+
+ :param string: A unicode string for the header. It must be possible
+ to encode this string to bytes using the character set's
+ output codec.
+ :param maxlengths: Maximum line length iterator. Each element
+ returned from this iterator will provide the next maximum line
+ length. This parameter is used as an argument to built-in next()
+ and should never be exhausted. The maximum line lengths should
+ not count the RFC 2047 chrome. These line lengths are only a
+ hint; the splitter does the best it can.
+ :return: Lines of encoded strings, each with RFC 2047 chrome.
+ """
+ # See which encoding we should use.
+ codec = self.output_codec or 'us-ascii'
+ header_bytes = _encode(string, codec)
+ encoder_module = self._get_encoder(header_bytes)
+ encoder = partial(encoder_module.header_encode, charset=codec)
+ # Calculate the number of characters that the RFC 2047 chrome will
+ # contribute to each line.
+ charset = self.get_output_charset()
+ extra = len(charset) + RFC2047_CHROME_LEN
+ # Now comes the hard part. We must encode bytes but we can't split on
+ # bytes because some character sets are variable length and each
+ # encoded word must stand on its own. So the problem is you have to
+ # encode to bytes to figure out this word's length, but you must split
+ # on characters. This causes two problems: first, we don't know how
+ # many octets a specific substring of unicode characters will get
+ # encoded to, and second, we don't know how many ASCII characters
+ # those octets will get encoded to. Unless we try it. Which seems
+ # inefficient. In the interest of being correct rather than fast (and
+ # in the hope that there will be few encoded headers in any such
+ # message), brute force it. :(
+ lines = []
+ current_line = []
+ maxlen = next(maxlengths) - extra
+ for character in string:
+ current_line.append(character)
+ this_line = EMPTYSTRING.join(current_line)
+ length = encoder_module.header_length(_encode(this_line, charset))
+ if length > maxlen:
+ # This last character doesn't fit so pop it off.
+ current_line.pop()
+ # Does nothing fit on the first line?
+ if not lines and not current_line:
+ lines.append(None)
+ else:
+ separator = (' ' if lines else '')
+ joined_line = EMPTYSTRING.join(current_line)
+ header_bytes = _encode(joined_line, codec)
+ lines.append(encoder(header_bytes))
+ current_line = [character]
+ maxlen = next(maxlengths) - extra
+ joined_line = EMPTYSTRING.join(current_line)
+ header_bytes = _encode(joined_line, codec)
+ lines.append(encoder(header_bytes))
+ return lines
+
+ def _get_encoder(self, header_bytes):
+ if self.header_encoding == BASE64:
+ return email.base64mime
+ elif self.header_encoding == QP:
+ return email.quoprimime
+ elif self.header_encoding == SHORTEST:
+ len64 = email.base64mime.header_length(header_bytes)
+ lenqp = email.quoprimime.header_length(header_bytes)
+ if len64 < lenqp:
+ return email.base64mime
+ else:
+ return email.quoprimime
+ else:
+ return None
+
+ def body_encode(self, string):
+ """Body-encode a string by converting it first to bytes.
+
+ The type of encoding (base64 or quoted-printable) will be based on
+ self.body_encoding. If body_encoding is None, we assume the
+ output charset is a 7bit encoding, so re-encoding the decoded
+ string using the ascii codec produces the correct string version
+ of the content.
+ """
+ if not string:
+ return string
+ if self.body_encoding is BASE64:
+ if isinstance(string, str):
+ string = string.encode(self.output_charset)
+ return email.base64mime.body_encode(string)
+ elif self.body_encoding is QP:
+ # quopromime.body_encode takes a string, but operates on it as if
+ # it were a list of byte codes. For a (minimal) history on why
+ # this is so, see changeset 0cf700464177. To correctly encode a
+ # character set, then, we must turn it into pseudo bytes via the
+ # latin1 charset, which will encode any byte as a single code point
+ # between 0 and 255, which is what body_encode is expecting.
+ if isinstance(string, str):
+ string = string.encode(self.output_charset)
+ string = string.decode('latin1')
+ return email.quoprimime.body_encode(string)
+ else:
+ if isinstance(string, str):
+ string = string.encode(self.output_charset).decode('ascii')
+ return string
diff --git a/src/future/backports/email/encoders.py b/src/future/backports/email/encoders.py
new file mode 100644
index 00000000..15d2eb46
--- /dev/null
+++ b/src/future/backports/email/encoders.py
@@ -0,0 +1,90 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Encodings and related functions."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import str
+
+__all__ = [
+ 'encode_7or8bit',
+ 'encode_base64',
+ 'encode_noop',
+ 'encode_quopri',
+ ]
+
+
+try:
+ from base64 import encodebytes as _bencode
+except ImportError:
+ # Py2 compatibility. TODO: test this!
+ from base64 import encodestring as _bencode
+from quopri import encodestring as _encodestring
+
+
+def _qencode(s):
+ enc = _encodestring(s, quotetabs=True)
+ # Must encode spaces, which quopri.encodestring() doesn't do
+ return enc.replace(' ', '=20')
+
+
+def encode_base64(msg):
+ """Encode the message's payload in Base64.
+
+ Also, add an appropriate Content-Transfer-Encoding header.
+ """
+ orig = msg.get_payload()
+ encdata = str(_bencode(orig), 'ascii')
+ msg.set_payload(encdata)
+ msg['Content-Transfer-Encoding'] = 'base64'
+
+
+def encode_quopri(msg):
+ """Encode the message's payload in quoted-printable.
+
+ Also, add an appropriate Content-Transfer-Encoding header.
+ """
+ orig = msg.get_payload()
+ encdata = _qencode(orig)
+ msg.set_payload(encdata)
+ msg['Content-Transfer-Encoding'] = 'quoted-printable'
+
+
+def encode_7or8bit(msg):
+ """Set the Content-Transfer-Encoding header to 7bit or 8bit."""
+ orig = msg.get_payload()
+ if orig is None:
+ # There's no payload. For backwards compatibility we use 7bit
+ msg['Content-Transfer-Encoding'] = '7bit'
+ return
+ # We play a trick to make this go fast. If encoding/decode to ASCII
+ # succeeds, we know the data must be 7bit, otherwise treat it as 8bit.
+ try:
+ if isinstance(orig, str):
+ orig.encode('ascii')
+ else:
+ orig.decode('ascii')
+ except UnicodeError:
+ charset = msg.get_charset()
+ output_cset = charset and charset.output_charset
+ # iso-2022-* is non-ASCII but encodes to a 7-bit representation
+ if output_cset and output_cset.lower().startswith('iso-2022-'):
+ msg['Content-Transfer-Encoding'] = '7bit'
+ else:
+ msg['Content-Transfer-Encoding'] = '8bit'
+ else:
+ msg['Content-Transfer-Encoding'] = '7bit'
+ if not isinstance(orig, str):
+ msg.set_payload(orig.decode('ascii', 'surrogateescape'))
+
+
+def encode_noop(msg):
+ """Do nothing."""
+ # Well, not quite *nothing*: in Python3 we have to turn bytes into a string
+ # in our internal surrogateescaped form in order to keep the model
+ # consistent.
+ orig = msg.get_payload()
+ if not isinstance(orig, str):
+ msg.set_payload(orig.decode('ascii', 'surrogateescape'))
diff --git a/src/future/backports/email/errors.py b/src/future/backports/email/errors.py
new file mode 100644
index 00000000..0fe599cf
--- /dev/null
+++ b/src/future/backports/email/errors.py
@@ -0,0 +1,111 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""email package exception classes."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import super
+
+
+class MessageError(Exception):
+ """Base class for errors in the email package."""
+
+
+class MessageParseError(MessageError):
+ """Base class for message parsing errors."""
+
+
+class HeaderParseError(MessageParseError):
+ """Error while parsing headers."""
+
+
+class BoundaryError(MessageParseError):
+ """Couldn't find terminating boundary."""
+
+
+class MultipartConversionError(MessageError, TypeError):
+ """Conversion to a multipart is prohibited."""
+
+
+class CharsetError(MessageError):
+ """An illegal charset was given."""
+
+
+# These are parsing defects which the parser was able to work around.
+class MessageDefect(ValueError):
+ """Base class for a message defect."""
+
+ def __init__(self, line=None):
+ if line is not None:
+ super().__init__(line)
+ self.line = line
+
+class NoBoundaryInMultipartDefect(MessageDefect):
+ """A message claimed to be a multipart but had no boundary parameter."""
+
+class StartBoundaryNotFoundDefect(MessageDefect):
+ """The claimed start boundary was never found."""
+
+class CloseBoundaryNotFoundDefect(MessageDefect):
+ """A start boundary was found, but not the corresponding close boundary."""
+
+class FirstHeaderLineIsContinuationDefect(MessageDefect):
+ """A message had a continuation line as its first header line."""
+
+class MisplacedEnvelopeHeaderDefect(MessageDefect):
+ """A 'Unix-from' header was found in the middle of a header block."""
+
+class MissingHeaderBodySeparatorDefect(MessageDefect):
+ """Found line with no leading whitespace and no colon before blank line."""
+# XXX: backward compatibility, just in case (it was never emitted).
+MalformedHeaderDefect = MissingHeaderBodySeparatorDefect
+
+class MultipartInvariantViolationDefect(MessageDefect):
+ """A message claimed to be a multipart but no subparts were found."""
+
+class InvalidMultipartContentTransferEncodingDefect(MessageDefect):
+ """An invalid content transfer encoding was set on the multipart itself."""
+
+class UndecodableBytesDefect(MessageDefect):
+ """Header contained bytes that could not be decoded"""
+
+class InvalidBase64PaddingDefect(MessageDefect):
+ """base64 encoded sequence had an incorrect length"""
+
+class InvalidBase64CharactersDefect(MessageDefect):
+ """base64 encoded sequence had characters not in base64 alphabet"""
+
+# These errors are specific to header parsing.
+
+class HeaderDefect(MessageDefect):
+ """Base class for a header defect."""
+
+ def __init__(self, *args, **kw):
+ super().__init__(*args, **kw)
+
+class InvalidHeaderDefect(HeaderDefect):
+ """Header is not valid, message gives details."""
+
+class HeaderMissingRequiredValue(HeaderDefect):
+ """A header that must have a value had none"""
+
+class NonPrintableDefect(HeaderDefect):
+ """ASCII characters outside the ascii-printable range found"""
+
+ def __init__(self, non_printables):
+ super().__init__(non_printables)
+ self.non_printables = non_printables
+
+ def __str__(self):
+ return ("the following ASCII non-printables found in header: "
+ "{}".format(self.non_printables))
+
+class ObsoleteHeaderDefect(HeaderDefect):
+ """Header uses syntax declared obsolete by RFC 5322"""
+
+class NonASCIILocalPartDefect(HeaderDefect):
+ """local_part contains non-ASCII characters"""
+ # This defect only occurs during unicode parsing, not when
+ # parsing messages decoded from binary.
diff --git a/src/future/backports/email/feedparser.py b/src/future/backports/email/feedparser.py
new file mode 100644
index 00000000..935c26e3
--- /dev/null
+++ b/src/future/backports/email/feedparser.py
@@ -0,0 +1,525 @@
+# Copyright (C) 2004-2006 Python Software Foundation
+# Authors: Baxter, Wouters and Warsaw
+# Contact: email-sig@python.org
+
+"""FeedParser - An email feed parser.
+
+The feed parser implements an interface for incrementally parsing an email
+message, line by line. This has advantages for certain applications, such as
+those reading email messages off a socket.
+
+FeedParser.feed() is the primary interface for pushing new data into the
+parser. It returns when there's nothing more it can do with the available
+data. When you have no more data to push into the parser, call .close().
+This completes the parsing and returns the root message object.
+
+The other advantage of this parser is that it will never raise a parsing
+exception. Instead, when it finds something unexpected, it adds a 'defect' to
+the current message. Defects are just instances that live on the message
+object's .defects attribute.
+"""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import object, range, super
+from future.utils import implements_iterator, PY3
+
+__all__ = ['FeedParser', 'BytesFeedParser']
+
+import re
+
+from future.backports.email import errors
+from future.backports.email import message
+from future.backports.email._policybase import compat32
+
+NLCRE = re.compile('\r\n|\r|\n')
+NLCRE_bol = re.compile('(\r\n|\r|\n)')
+NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')
+NLCRE_crack = re.compile('(\r\n|\r|\n)')
+# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
+# except controls, SP, and ":".
+headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])')
+EMPTYSTRING = ''
+NL = '\n'
+
+NeedMoreData = object()
+
+
+# @implements_iterator
+class BufferedSubFile(object):
+ """A file-ish object that can have new data loaded into it.
+
+ You can also push and pop line-matching predicates onto a stack. When the
+ current predicate matches the current line, a false EOF response
+ (i.e. empty string) is returned instead. This lets the parser adhere to a
+ simple abstraction -- it parses until EOF closes the current message.
+ """
+ def __init__(self):
+ # The last partial line pushed into this object.
+ self._partial = ''
+ # The list of full, pushed lines, in reverse order
+ self._lines = []
+ # The stack of false-EOF checking predicates.
+ self._eofstack = []
+ # A flag indicating whether the file has been closed or not.
+ self._closed = False
+
+ def push_eof_matcher(self, pred):
+ self._eofstack.append(pred)
+
+ def pop_eof_matcher(self):
+ return self._eofstack.pop()
+
+ def close(self):
+ # Don't forget any trailing partial line.
+ self._lines.append(self._partial)
+ self._partial = ''
+ self._closed = True
+
+ def readline(self):
+ if not self._lines:
+ if self._closed:
+ return ''
+ return NeedMoreData
+ # Pop the line off the stack and see if it matches the current
+ # false-EOF predicate.
+ line = self._lines.pop()
+ # RFC 2046, section 5.1.2 requires us to recognize outer level
+ # boundaries at any level of inner nesting. Do this, but be sure it's
+ # in the order of most to least nested.
+ for ateof in self._eofstack[::-1]:
+ if ateof(line):
+ # We're at the false EOF. But push the last line back first.
+ self._lines.append(line)
+ return ''
+ return line
+
+ def unreadline(self, line):
+ # Let the consumer push a line back into the buffer.
+ assert line is not NeedMoreData
+ self._lines.append(line)
+
+ def push(self, data):
+ """Push some new data into this object."""
+ # Handle any previous leftovers
+ data, self._partial = self._partial + data, ''
+ # Crack into lines, but preserve the newlines on the end of each
+ parts = NLCRE_crack.split(data)
+ # The *ahem* interesting behaviour of re.split when supplied grouping
+ # parentheses is that the last element of the resulting list is the
+ # data after the final RE. In the case of a NL/CR terminated string,
+ # this is the empty string.
+ self._partial = parts.pop()
+ #GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r:
+ # is there a \n to follow later?
+ if not self._partial and parts and parts[-1].endswith('\r'):
+ self._partial = parts.pop(-2)+parts.pop()
+ # parts is a list of strings, alternating between the line contents
+ # and the eol character(s). Gather up a list of lines after
+ # re-attaching the newlines.
+ lines = []
+ for i in range(len(parts) // 2):
+ lines.append(parts[i*2] + parts[i*2+1])
+ self.pushlines(lines)
+
+ def pushlines(self, lines):
+ # Reverse and insert at the front of the lines.
+ self._lines[:0] = lines[::-1]
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ line = self.readline()
+ if line == '':
+ raise StopIteration
+ return line
+
+
+class FeedParser(object):
+ """A feed-style parser of email."""
+
+ def __init__(self, _factory=message.Message, **_3to2kwargs):
+ if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
+ else: policy = compat32
+ """_factory is called with no arguments to create a new message obj
+
+ The policy keyword specifies a policy object that controls a number of
+ aspects of the parser's operation. The default policy maintains
+ backward compatibility.
+
+ """
+ self._factory = _factory
+ self.policy = policy
+ try:
+ _factory(policy=self.policy)
+ self._factory_kwds = lambda: {'policy': self.policy}
+ except TypeError:
+ # Assume this is an old-style factory
+ self._factory_kwds = lambda: {}
+ self._input = BufferedSubFile()
+ self._msgstack = []
+ if PY3:
+ self._parse = self._parsegen().__next__
+ else:
+ self._parse = self._parsegen().next
+ self._cur = None
+ self._last = None
+ self._headersonly = False
+
+ # Non-public interface for supporting Parser's headersonly flag
+ def _set_headersonly(self):
+ self._headersonly = True
+
+ def feed(self, data):
+ """Push more data into the parser."""
+ self._input.push(data)
+ self._call_parse()
+
+ def _call_parse(self):
+ try:
+ self._parse()
+ except StopIteration:
+ pass
+
+ def close(self):
+ """Parse all remaining data and return the root message object."""
+ self._input.close()
+ self._call_parse()
+ root = self._pop_message()
+ assert not self._msgstack
+ # Look for final set of defects
+ if root.get_content_maintype() == 'multipart' \
+ and not root.is_multipart():
+ defect = errors.MultipartInvariantViolationDefect()
+ self.policy.handle_defect(root, defect)
+ return root
+
+ def _new_message(self):
+ msg = self._factory(**self._factory_kwds())
+ if self._cur and self._cur.get_content_type() == 'multipart/digest':
+ msg.set_default_type('message/rfc822')
+ if self._msgstack:
+ self._msgstack[-1].attach(msg)
+ self._msgstack.append(msg)
+ self._cur = msg
+ self._last = msg
+
+ def _pop_message(self):
+ retval = self._msgstack.pop()
+ if self._msgstack:
+ self._cur = self._msgstack[-1]
+ else:
+ self._cur = None
+ return retval
+
+ def _parsegen(self):
+ # Create a new message and start by parsing headers.
+ self._new_message()
+ headers = []
+ # Collect the headers, searching for a line that doesn't match the RFC
+ # 2822 header or continuation pattern (including an empty line).
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if not headerRE.match(line):
+ # If we saw the RFC defined header/body separator
+ # (i.e. newline), just throw it away. Otherwise the line is
+ # part of the body so push it back.
+ if not NLCRE.match(line):
+ defect = errors.MissingHeaderBodySeparatorDefect()
+ self.policy.handle_defect(self._cur, defect)
+ self._input.unreadline(line)
+ break
+ headers.append(line)
+ # Done with the headers, so parse them and figure out what we're
+ # supposed to see in the body of the message.
+ self._parse_headers(headers)
+ # Headers-only parsing is a backwards compatibility hack, which was
+ # necessary in the older parser, which could raise errors. All
+ # remaining lines in the input are thrown into the message body.
+ if self._headersonly:
+ lines = []
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if line == '':
+ break
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+ return
+ if self._cur.get_content_type() == 'message/delivery-status':
+ # message/delivery-status contains blocks of headers separated by
+ # a blank line. We'll represent each header block as a separate
+ # nested message object, but the processing is a bit different
+ # than standard message/* types because there is no body for the
+ # nested messages. A blank line separates the subparts.
+ while True:
+ self._input.push_eof_matcher(NLCRE.match)
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ msg = self._pop_message()
+ # We need to pop the EOF matcher in order to tell if we're at
+ # the end of the current file, not the end of the last block
+ # of message headers.
+ self._input.pop_eof_matcher()
+ # The input stream must be sitting at the newline or at the
+ # EOF. We want to see if we're at the end of this subpart, so
+ # first consume the blank line, then test the next line to see
+ # if we're at this subpart's EOF.
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ if line == '':
+ break
+ # Not at EOF so this is a line we're going to need.
+ self._input.unreadline(line)
+ return
+ if self._cur.get_content_maintype() == 'message':
+ # The message claims to be a message/* type, then what follows is
+ # another RFC 2822 message.
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ self._pop_message()
+ return
+ if self._cur.get_content_maintype() == 'multipart':
+ boundary = self._cur.get_boundary()
+ if boundary is None:
+ # The message /claims/ to be a multipart but it has not
+ # defined a boundary. That's a problem which we'll handle by
+ # reading everything until the EOF and marking the message as
+ # defective.
+ defect = errors.NoBoundaryInMultipartDefect()
+ self.policy.handle_defect(self._cur, defect)
+ lines = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+ return
+ # Make sure a valid content type was specified per RFC 2045:6.4.
+ if (self._cur.get('content-transfer-encoding', '8bit').lower()
+ not in ('7bit', '8bit', 'binary')):
+ defect = errors.InvalidMultipartContentTransferEncodingDefect()
+ self.policy.handle_defect(self._cur, defect)
+ # Create a line match predicate which matches the inter-part
+ # boundary as well as the end-of-multipart boundary. Don't push
+ # this onto the input stream until we've scanned past the
+ # preamble.
+ separator = '--' + boundary
+ boundaryre = re.compile(
+ '(?P' + re.escape(separator) +
+ r')(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$')
+ capturing_preamble = True
+ preamble = []
+ linesep = False
+ close_boundary_seen = False
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if line == '':
+ break
+ mo = boundaryre.match(line)
+ if mo:
+ # If we're looking at the end boundary, we're done with
+ # this multipart. If there was a newline at the end of
+ # the closing boundary, then we need to initialize the
+ # epilogue with the empty string (see below).
+ if mo.group('end'):
+ close_boundary_seen = True
+ linesep = mo.group('linesep')
+ break
+ # We saw an inter-part boundary. Were we in the preamble?
+ if capturing_preamble:
+ if preamble:
+ # According to RFC 2046, the last newline belongs
+ # to the boundary.
+ lastline = preamble[-1]
+ eolmo = NLCRE_eol.search(lastline)
+ if eolmo:
+ preamble[-1] = lastline[:-len(eolmo.group(0))]
+ self._cur.preamble = EMPTYSTRING.join(preamble)
+ capturing_preamble = False
+ self._input.unreadline(line)
+ continue
+ # We saw a boundary separating two parts. Consume any
+ # multiple boundary lines that may be following. Our
+ # interpretation of RFC 2046 BNF grammar does not produce
+ # body parts within such double boundaries.
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ mo = boundaryre.match(line)
+ if not mo:
+ self._input.unreadline(line)
+ break
+ # Recurse to parse this subpart; the input stream points
+ # at the subpart's first line.
+ self._input.push_eof_matcher(boundaryre.match)
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ # Because of RFC 2046, the newline preceding the boundary
+ # separator actually belongs to the boundary, not the
+ # previous subpart's payload (or epilogue if the previous
+ # part is a multipart).
+ if self._last.get_content_maintype() == 'multipart':
+ epilogue = self._last.epilogue
+ if epilogue == '':
+ self._last.epilogue = None
+ elif epilogue is not None:
+ mo = NLCRE_eol.search(epilogue)
+ if mo:
+ end = len(mo.group(0))
+ self._last.epilogue = epilogue[:-end]
+ else:
+ payload = self._last._payload
+ if isinstance(payload, str):
+ mo = NLCRE_eol.search(payload)
+ if mo:
+ payload = payload[:-len(mo.group(0))]
+ self._last._payload = payload
+ self._input.pop_eof_matcher()
+ self._pop_message()
+ # Set the multipart up for newline cleansing, which will
+ # happen if we're in a nested multipart.
+ self._last = self._cur
+ else:
+ # I think we must be in the preamble
+ assert capturing_preamble
+ preamble.append(line)
+ # We've seen either the EOF or the end boundary. If we're still
+ # capturing the preamble, we never saw the start boundary. Note
+ # that as a defect and store the captured text as the payload.
+ if capturing_preamble:
+ defect = errors.StartBoundaryNotFoundDefect()
+ self.policy.handle_defect(self._cur, defect)
+ self._cur.set_payload(EMPTYSTRING.join(preamble))
+ epilogue = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ self._cur.epilogue = EMPTYSTRING.join(epilogue)
+ return
+ # If we're not processing the preamble, then we might have seen
+ # EOF without seeing that end boundary...that is also a defect.
+ if not close_boundary_seen:
+ defect = errors.CloseBoundaryNotFoundDefect()
+ self.policy.handle_defect(self._cur, defect)
+ return
+ # Everything from here to the EOF is epilogue. If the end boundary
+ # ended in a newline, we'll need to make sure the epilogue isn't
+ # None
+ if linesep:
+ epilogue = ['']
+ else:
+ epilogue = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ epilogue.append(line)
+ # Any CRLF at the front of the epilogue is not technically part of
+ # the epilogue. Also, watch out for an empty string epilogue,
+ # which means a single newline.
+ if epilogue:
+ firstline = epilogue[0]
+ bolmo = NLCRE_bol.match(firstline)
+ if bolmo:
+ epilogue[0] = firstline[len(bolmo.group(0)):]
+ self._cur.epilogue = EMPTYSTRING.join(epilogue)
+ return
+ # Otherwise, it's some non-multipart type, so the entire rest of the
+ # file contents becomes the payload.
+ lines = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+
+ def _parse_headers(self, lines):
+ # Passed a list of lines that make up the headers for the current msg
+ lastheader = ''
+ lastvalue = []
+ for lineno, line in enumerate(lines):
+ # Check for continuation
+ if line[0] in ' \t':
+ if not lastheader:
+ # The first line of the headers was a continuation. This
+ # is illegal, so let's note the defect, store the illegal
+ # line, and ignore it for purposes of headers.
+ defect = errors.FirstHeaderLineIsContinuationDefect(line)
+ self.policy.handle_defect(self._cur, defect)
+ continue
+ lastvalue.append(line)
+ continue
+ if lastheader:
+ self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
+ lastheader, lastvalue = '', []
+ # Check for envelope header, i.e. unix-from
+ if line.startswith('From '):
+ if lineno == 0:
+ # Strip off the trailing newline
+ mo = NLCRE_eol.search(line)
+ if mo:
+ line = line[:-len(mo.group(0))]
+ self._cur.set_unixfrom(line)
+ continue
+ elif lineno == len(lines) - 1:
+ # Something looking like a unix-from at the end - it's
+ # probably the first line of the body, so push back the
+ # line and stop.
+ self._input.unreadline(line)
+ return
+ else:
+ # Weirdly placed unix-from line. Note this as a defect
+ # and ignore it.
+ defect = errors.MisplacedEnvelopeHeaderDefect(line)
+ self._cur.defects.append(defect)
+ continue
+ # Split the line on the colon separating field name from value.
+ # There will always be a colon, because if there wasn't the part of
+ # the parser that calls us would have started parsing the body.
+ i = line.find(':')
+ assert i>0, "_parse_headers fed line with no : and no leading WS"
+ lastheader = line[:i]
+ lastvalue = [line]
+ # Done with all the lines, so handle the last header.
+ if lastheader:
+ self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
+
+
+class BytesFeedParser(FeedParser):
+ """Like FeedParser, but feed accepts bytes."""
+
+ def feed(self, data):
+ super().feed(data.decode('ascii', 'surrogateescape'))
diff --git a/src/future/backports/email/generator.py b/src/future/backports/email/generator.py
new file mode 100644
index 00000000..53493d0a
--- /dev/null
+++ b/src/future/backports/email/generator.py
@@ -0,0 +1,498 @@
+# Copyright (C) 2001-2010 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Classes to generate plain text from a message object tree."""
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import super
+from future.builtins import str
+
+__all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator']
+
+import re
+import sys
+import time
+import random
+import warnings
+
+from io import StringIO, BytesIO
+from future.backports.email._policybase import compat32
+from future.backports.email.header import Header
+from future.backports.email.utils import _has_surrogates
+import future.backports.email.charset as _charset
+
+UNDERSCORE = '_'
+NL = '\n' # XXX: no longer used by the code below.
+
+fcre = re.compile(r'^From ', re.MULTILINE)
+
+
+class Generator(object):
+ """Generates output from a Message object tree.
+
+ This basic generator writes the message to the given file object as plain
+ text.
+ """
+ #
+ # Public interface
+ #
+
+ def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, **_3to2kwargs):
+ if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
+ else: policy = None
+ """Create the generator for message flattening.
+
+ outfp is the output file-like object for writing the message to. It
+ must have a write() method.
+
+ Optional mangle_from_ is a flag that, when True (the default), escapes
+ From_ lines in the body of the message by putting a `>' in front of
+ them.
+
+ Optional maxheaderlen specifies the longest length for a non-continued
+ header. When a header line is longer (in characters, with tabs
+ expanded to 8 spaces) than maxheaderlen, the header will split as
+ defined in the Header class. Set maxheaderlen to zero to disable
+ header wrapping. The default is 78, as recommended (but not required)
+ by RFC 2822.
+
+ The policy keyword specifies a policy object that controls a number of
+ aspects of the generator's operation. The default policy maintains
+ backward compatibility.
+
+ """
+ self._fp = outfp
+ self._mangle_from_ = mangle_from_
+ self.maxheaderlen = maxheaderlen
+ self.policy = policy
+
+ def write(self, s):
+ # Just delegate to the file object
+ self._fp.write(s)
+
+ def flatten(self, msg, unixfrom=False, linesep=None):
+ r"""Print the message object tree rooted at msg to the output file
+ specified when the Generator instance was created.
+
+ unixfrom is a flag that forces the printing of a Unix From_ delimiter
+ before the first object in the message tree. If the original message
+ has no From_ delimiter, a `standard' one is crafted. By default, this
+ is False to inhibit the printing of any From_ delimiter.
+
+ Note that for subobjects, no From_ line is printed.
+
+ linesep specifies the characters used to indicate a new line in
+ the output. The default value is determined by the policy.
+
+ """
+ # We use the _XXX constants for operating on data that comes directly
+ # from the msg, and _encoded_XXX constants for operating on data that
+ # has already been converted (to bytes in the BytesGenerator) and
+ # inserted into a temporary buffer.
+ policy = msg.policy if self.policy is None else self.policy
+ if linesep is not None:
+ policy = policy.clone(linesep=linesep)
+ if self.maxheaderlen is not None:
+ policy = policy.clone(max_line_length=self.maxheaderlen)
+ self._NL = policy.linesep
+ self._encoded_NL = self._encode(self._NL)
+ self._EMPTY = ''
+ self._encoded_EMTPY = self._encode('')
+ # Because we use clone (below) when we recursively process message
+ # subparts, and because clone uses the computed policy (not None),
+ # submessages will automatically get set to the computed policy when
+ # they are processed by this code.
+ old_gen_policy = self.policy
+ old_msg_policy = msg.policy
+ try:
+ self.policy = policy
+ msg.policy = policy
+ if unixfrom:
+ ufrom = msg.get_unixfrom()
+ if not ufrom:
+ ufrom = 'From nobody ' + time.ctime(time.time())
+ self.write(ufrom + self._NL)
+ self._write(msg)
+ finally:
+ self.policy = old_gen_policy
+ msg.policy = old_msg_policy
+
+ def clone(self, fp):
+ """Clone this generator with the exact same options."""
+ return self.__class__(fp,
+ self._mangle_from_,
+ None, # Use policy setting, which we've adjusted
+ policy=self.policy)
+
+ #
+ # Protected interface - undocumented ;/
+ #
+
+ # Note that we use 'self.write' when what we are writing is coming from
+ # the source, and self._fp.write when what we are writing is coming from a
+ # buffer (because the Bytes subclass has already had a chance to transform
+ # the data in its write method in that case). This is an entirely
+ # pragmatic split determined by experiment; we could be more general by
+ # always using write and having the Bytes subclass write method detect when
+ # it has already transformed the input; but, since this whole thing is a
+ # hack anyway this seems good enough.
+
+ # Similarly, we have _XXX and _encoded_XXX attributes that are used on
+ # source and buffer data, respectively.
+ _encoded_EMPTY = ''
+
+ def _new_buffer(self):
+ # BytesGenerator overrides this to return BytesIO.
+ return StringIO()
+
+ def _encode(self, s):
+ # BytesGenerator overrides this to encode strings to bytes.
+ return s
+
+ def _write_lines(self, lines):
+ # We have to transform the line endings.
+ if not lines:
+ return
+ lines = lines.splitlines(True)
+ for line in lines[:-1]:
+ self.write(line.rstrip('\r\n'))
+ self.write(self._NL)
+ laststripped = lines[-1].rstrip('\r\n')
+ self.write(laststripped)
+ if len(lines[-1]) != len(laststripped):
+ self.write(self._NL)
+
+ def _write(self, msg):
+ # We can't write the headers yet because of the following scenario:
+ # say a multipart message includes the boundary string somewhere in
+ # its body. We'd have to calculate the new boundary /before/ we write
+ # the headers so that we can write the correct Content-Type:
+ # parameter.
+ #
+ # The way we do this, so as to make the _handle_*() methods simpler,
+ # is to cache any subpart writes into a buffer. The we write the
+ # headers and the buffer contents. That way, subpart handlers can
+ # Do The Right Thing, and can still modify the Content-Type: header if
+ # necessary.
+ oldfp = self._fp
+ try:
+ self._fp = sfp = self._new_buffer()
+ self._dispatch(msg)
+ finally:
+ self._fp = oldfp
+ # Write the headers. First we see if the message object wants to
+ # handle that itself. If not, we'll do it generically.
+ meth = getattr(msg, '_write_headers', None)
+ if meth is None:
+ self._write_headers(msg)
+ else:
+ meth(self)
+ self._fp.write(sfp.getvalue())
+
+ def _dispatch(self, msg):
+ # Get the Content-Type: for the message, then try to dispatch to
+ # self._handle__(). If there's no handler for the
+ # full MIME type, then dispatch to self._handle_(). If
+ # that's missing too, then dispatch to self._writeBody().
+ main = msg.get_content_maintype()
+ sub = msg.get_content_subtype()
+ specific = UNDERSCORE.join((main, sub)).replace('-', '_')
+ meth = getattr(self, '_handle_' + specific, None)
+ if meth is None:
+ generic = main.replace('-', '_')
+ meth = getattr(self, '_handle_' + generic, None)
+ if meth is None:
+ meth = self._writeBody
+ meth(msg)
+
+ #
+ # Default handlers
+ #
+
+ def _write_headers(self, msg):
+ for h, v in msg.raw_items():
+ self.write(self.policy.fold(h, v))
+ # A blank line always separates headers from body
+ self.write(self._NL)
+
+ #
+ # Handlers for writing types and subtypes
+ #
+
+ def _handle_text(self, msg):
+ payload = msg.get_payload()
+ if payload is None:
+ return
+ if not isinstance(payload, str):
+ raise TypeError('string payload expected: %s' % type(payload))
+ if _has_surrogates(msg._payload):
+ charset = msg.get_param('charset')
+ if charset is not None:
+ del msg['content-transfer-encoding']
+ msg.set_payload(payload, charset)
+ payload = msg.get_payload()
+ if self._mangle_from_:
+ payload = fcre.sub('>From ', payload)
+ self._write_lines(payload)
+
+ # Default body handler
+ _writeBody = _handle_text
+
+ def _handle_multipart(self, msg):
+ # The trick here is to write out each part separately, merge them all
+ # together, and then make sure that the boundary we've chosen isn't
+ # present in the payload.
+ msgtexts = []
+ subparts = msg.get_payload()
+ if subparts is None:
+ subparts = []
+ elif isinstance(subparts, str):
+ # e.g. a non-strict parse of a message with no starting boundary.
+ self.write(subparts)
+ return
+ elif not isinstance(subparts, list):
+ # Scalar payload
+ subparts = [subparts]
+ for part in subparts:
+ s = self._new_buffer()
+ g = self.clone(s)
+ g.flatten(part, unixfrom=False, linesep=self._NL)
+ msgtexts.append(s.getvalue())
+ # BAW: What about boundaries that are wrapped in double-quotes?
+ boundary = msg.get_boundary()
+ if not boundary:
+ # Create a boundary that doesn't appear in any of the
+ # message texts.
+ alltext = self._encoded_NL.join(msgtexts)
+ boundary = self._make_boundary(alltext)
+ msg.set_boundary(boundary)
+ # If there's a preamble, write it out, with a trailing CRLF
+ if msg.preamble is not None:
+ if self._mangle_from_:
+ preamble = fcre.sub('>From ', msg.preamble)
+ else:
+ preamble = msg.preamble
+ self._write_lines(preamble)
+ self.write(self._NL)
+ # dash-boundary transport-padding CRLF
+ self.write('--' + boundary + self._NL)
+ # body-part
+ if msgtexts:
+ self._fp.write(msgtexts.pop(0))
+ # *encapsulation
+ # --> delimiter transport-padding
+ # --> CRLF body-part
+ for body_part in msgtexts:
+ # delimiter transport-padding CRLF
+ self.write(self._NL + '--' + boundary + self._NL)
+ # body-part
+ self._fp.write(body_part)
+ # close-delimiter transport-padding
+ self.write(self._NL + '--' + boundary + '--')
+ if msg.epilogue is not None:
+ self.write(self._NL)
+ if self._mangle_from_:
+ epilogue = fcre.sub('>From ', msg.epilogue)
+ else:
+ epilogue = msg.epilogue
+ self._write_lines(epilogue)
+
+ def _handle_multipart_signed(self, msg):
+ # The contents of signed parts has to stay unmodified in order to keep
+ # the signature intact per RFC1847 2.1, so we disable header wrapping.
+ # RDM: This isn't enough to completely preserve the part, but it helps.
+ p = self.policy
+ self.policy = p.clone(max_line_length=0)
+ try:
+ self._handle_multipart(msg)
+ finally:
+ self.policy = p
+
+ def _handle_message_delivery_status(self, msg):
+ # We can't just write the headers directly to self's file object
+ # because this will leave an extra newline between the last header
+ # block and the boundary. Sigh.
+ blocks = []
+ for part in msg.get_payload():
+ s = self._new_buffer()
+ g = self.clone(s)
+ g.flatten(part, unixfrom=False, linesep=self._NL)
+ text = s.getvalue()
+ lines = text.split(self._encoded_NL)
+ # Strip off the unnecessary trailing empty line
+ if lines and lines[-1] == self._encoded_EMPTY:
+ blocks.append(self._encoded_NL.join(lines[:-1]))
+ else:
+ blocks.append(text)
+ # Now join all the blocks with an empty line. This has the lovely
+ # effect of separating each block with an empty line, but not adding
+ # an extra one after the last one.
+ self._fp.write(self._encoded_NL.join(blocks))
+
+ def _handle_message(self, msg):
+ s = self._new_buffer()
+ g = self.clone(s)
+ # The payload of a message/rfc822 part should be a multipart sequence
+ # of length 1. The zeroth element of the list should be the Message
+ # object for the subpart. Extract that object, stringify it, and
+ # write it out.
+ # Except, it turns out, when it's a string instead, which happens when
+ # and only when HeaderParser is used on a message of mime type
+ # message/rfc822. Such messages are generated by, for example,
+ # Groupwise when forwarding unadorned messages. (Issue 7970.) So
+ # in that case we just emit the string body.
+ payload = msg._payload
+ if isinstance(payload, list):
+ g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
+ payload = s.getvalue()
+ else:
+ payload = self._encode(payload)
+ self._fp.write(payload)
+
+ # This used to be a module level function; we use a classmethod for this
+ # and _compile_re so we can continue to provide the module level function
+ # for backward compatibility by doing
+ # _make_boudary = Generator._make_boundary
+ # at the end of the module. It *is* internal, so we could drop that...
+ @classmethod
+ def _make_boundary(cls, text=None):
+ # Craft a random boundary. If text is given, ensure that the chosen
+ # boundary doesn't appear in the text.
+ token = random.randrange(sys.maxsize)
+ boundary = ('=' * 15) + (_fmt % token) + '=='
+ if text is None:
+ return boundary
+ b = boundary
+ counter = 0
+ while True:
+ cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
+ if not cre.search(text):
+ break
+ b = boundary + '.' + str(counter)
+ counter += 1
+ return b
+
+ @classmethod
+ def _compile_re(cls, s, flags):
+ return re.compile(s, flags)
+
+class BytesGenerator(Generator):
+ """Generates a bytes version of a Message object tree.
+
+ Functionally identical to the base Generator except that the output is
+ bytes and not string. When surrogates were used in the input to encode
+ bytes, these are decoded back to bytes for output. If the policy has
+ cte_type set to 7bit, then the message is transformed such that the
+ non-ASCII bytes are properly content transfer encoded, using the charset
+ unknown-8bit.
+
+ The outfp object must accept bytes in its write method.
+ """
+
+ # Bytes versions of this constant for use in manipulating data from
+ # the BytesIO buffer.
+ _encoded_EMPTY = b''
+
+ def write(self, s):
+ self._fp.write(str(s).encode('ascii', 'surrogateescape'))
+
+ def _new_buffer(self):
+ return BytesIO()
+
+ def _encode(self, s):
+ return s.encode('ascii')
+
+ def _write_headers(self, msg):
+ # This is almost the same as the string version, except for handling
+ # strings with 8bit bytes.
+ for h, v in msg.raw_items():
+ self._fp.write(self.policy.fold_binary(h, v))
+ # A blank line always separates headers from body
+ self.write(self._NL)
+
+ def _handle_text(self, msg):
+ # If the string has surrogates the original source was bytes, so
+ # just write it back out.
+ if msg._payload is None:
+ return
+ if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
+ if self._mangle_from_:
+ msg._payload = fcre.sub(">From ", msg._payload)
+ self._write_lines(msg._payload)
+ else:
+ super(BytesGenerator,self)._handle_text(msg)
+
+ # Default body handler
+ _writeBody = _handle_text
+
+ @classmethod
+ def _compile_re(cls, s, flags):
+ return re.compile(s.encode('ascii'), flags)
+
+
+_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
+
+class DecodedGenerator(Generator):
+ """Generates a text representation of a message.
+
+ Like the Generator base class, except that non-text parts are substituted
+ with a format string representing the part.
+ """
+ def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
+ """Like Generator.__init__() except that an additional optional
+ argument is allowed.
+
+ Walks through all subparts of a message. If the subpart is of main
+ type `text', then it prints the decoded payload of the subpart.
+
+ Otherwise, fmt is a format string that is used instead of the message
+ payload. fmt is expanded with the following keywords (in
+ %(keyword)s format):
+
+ type : Full MIME type of the non-text part
+ maintype : Main MIME type of the non-text part
+ subtype : Sub-MIME type of the non-text part
+ filename : Filename of the non-text part
+ description: Description associated with the non-text part
+ encoding : Content transfer encoding of the non-text part
+
+ The default value for fmt is None, meaning
+
+ [Non-text (%(type)s) part of message omitted, filename %(filename)s]
+ """
+ Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
+ if fmt is None:
+ self._fmt = _FMT
+ else:
+ self._fmt = fmt
+
+ def _dispatch(self, msg):
+ for part in msg.walk():
+ maintype = part.get_content_maintype()
+ if maintype == 'text':
+ print(part.get_payload(decode=False), file=self)
+ elif maintype == 'multipart':
+ # Just skip this
+ pass
+ else:
+ print(self._fmt % {
+ 'type' : part.get_content_type(),
+ 'maintype' : part.get_content_maintype(),
+ 'subtype' : part.get_content_subtype(),
+ 'filename' : part.get_filename('[no filename]'),
+ 'description': part.get('Content-Description',
+ '[no description]'),
+ 'encoding' : part.get('Content-Transfer-Encoding',
+ '[no encoding]'),
+ }, file=self)
+
+
+# Helper used by Generator._make_boundary
+_width = len(repr(sys.maxsize-1))
+_fmt = '%%0%dd' % _width
+
+# Backward compatibility
+_make_boundary = Generator._make_boundary
diff --git a/src/future/backports/email/header.py b/src/future/backports/email/header.py
new file mode 100644
index 00000000..63bf038c
--- /dev/null
+++ b/src/future/backports/email/header.py
@@ -0,0 +1,581 @@
+# Copyright (C) 2002-2007 Python Software Foundation
+# Author: Ben Gertzfield, Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Header encoding and decoding functionality."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import bytes, range, str, super, zip
+
+__all__ = [
+ 'Header',
+ 'decode_header',
+ 'make_header',
+ ]
+
+import re
+import binascii
+
+from future.backports import email
+from future.backports.email import base64mime
+from future.backports.email.errors import HeaderParseError
+import future.backports.email.charset as _charset
+
+# Helpers
+from future.backports.email.quoprimime import _max_append, header_decode
+
+Charset = _charset.Charset
+
+NL = '\n'
+SPACE = ' '
+BSPACE = b' '
+SPACE8 = ' ' * 8
+EMPTYSTRING = ''
+MAXLINELEN = 78
+FWS = ' \t'
+
+USASCII = Charset('us-ascii')
+UTF8 = Charset('utf-8')
+
+# Match encoded-word strings in the form =?charset?q?Hello_World?=
+ecre = re.compile(r'''
+ =\? # literal =?
+ (?P[^?]*?) # non-greedy up to the next ? is the charset
+ \? # literal ?
+ (?P[qb]) # either a "q" or a "b", case insensitive
+ \? # literal ?
+ (?P.*?) # non-greedy up to the next ?= is the encoded string
+ \?= # literal ?=
+ ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
+
+# Field name regexp, including trailing colon, but not separating whitespace,
+# according to RFC 2822. Character range is from tilde to exclamation mark.
+# For use with .match()
+fcre = re.compile(r'[\041-\176]+:$')
+
+# Find a header embedded in a putative header value. Used to check for
+# header injection attack.
+_embeded_header = re.compile(r'\n[^ \t]+:')
+
+
+def decode_header(header):
+ """Decode a message header value without converting charset.
+
+ Returns a list of (string, charset) pairs containing each of the decoded
+ parts of the header. Charset is None for non-encoded parts of the header,
+ otherwise a lower-case string containing the name of the character set
+ specified in the encoded string.
+
+ header may be a string that may or may not contain RFC2047 encoded words,
+ or it may be a Header object.
+
+ An email.errors.HeaderParseError may be raised when certain decoding error
+ occurs (e.g. a base64 decoding exception).
+ """
+ # If it is a Header object, we can just return the encoded chunks.
+ if hasattr(header, '_chunks'):
+ return [(_charset._encode(string, str(charset)), str(charset))
+ for string, charset in header._chunks]
+ # If no encoding, just return the header with no charset.
+ if not ecre.search(header):
+ return [(header, None)]
+ # First step is to parse all the encoded parts into triplets of the form
+ # (encoded_string, encoding, charset). For unencoded strings, the last
+ # two parts will be None.
+ words = []
+ for line in header.splitlines():
+ parts = ecre.split(line)
+ first = True
+ while parts:
+ unencoded = parts.pop(0)
+ if first:
+ unencoded = unencoded.lstrip()
+ first = False
+ if unencoded:
+ words.append((unencoded, None, None))
+ if parts:
+ charset = parts.pop(0).lower()
+ encoding = parts.pop(0).lower()
+ encoded = parts.pop(0)
+ words.append((encoded, encoding, charset))
+ # Now loop over words and remove words that consist of whitespace
+ # between two encoded strings.
+ import sys
+ droplist = []
+ for n, w in enumerate(words):
+ if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():
+ droplist.append(n-1)
+ for d in reversed(droplist):
+ del words[d]
+
+ # The next step is to decode each encoded word by applying the reverse
+ # base64 or quopri transformation. decoded_words is now a list of the
+ # form (decoded_word, charset).
+ decoded_words = []
+ for encoded_string, encoding, charset in words:
+ if encoding is None:
+ # This is an unencoded word.
+ decoded_words.append((encoded_string, charset))
+ elif encoding == 'q':
+ word = header_decode(encoded_string)
+ decoded_words.append((word, charset))
+ elif encoding == 'b':
+ paderr = len(encoded_string) % 4 # Postel's law: add missing padding
+ if paderr:
+ encoded_string += '==='[:4 - paderr]
+ try:
+ word = base64mime.decode(encoded_string)
+ except binascii.Error:
+ raise HeaderParseError('Base64 decoding error')
+ else:
+ decoded_words.append((word, charset))
+ else:
+ raise AssertionError('Unexpected encoding: ' + encoding)
+ # Now convert all words to bytes and collapse consecutive runs of
+ # similarly encoded words.
+ collapsed = []
+ last_word = last_charset = None
+ for word, charset in decoded_words:
+ if isinstance(word, str):
+ word = bytes(word, 'raw-unicode-escape')
+ if last_word is None:
+ last_word = word
+ last_charset = charset
+ elif charset != last_charset:
+ collapsed.append((last_word, last_charset))
+ last_word = word
+ last_charset = charset
+ elif last_charset is None:
+ last_word += BSPACE + word
+ else:
+ last_word += word
+ collapsed.append((last_word, last_charset))
+ return collapsed
+
+
+def make_header(decoded_seq, maxlinelen=None, header_name=None,
+ continuation_ws=' '):
+ """Create a Header from a sequence of pairs as returned by decode_header()
+
+ decode_header() takes a header value string and returns a sequence of
+ pairs of the format (decoded_string, charset) where charset is the string
+ name of the character set.
+
+ This function takes one of those sequence of pairs and returns a Header
+ instance. Optional maxlinelen, header_name, and continuation_ws are as in
+ the Header constructor.
+ """
+ h = Header(maxlinelen=maxlinelen, header_name=header_name,
+ continuation_ws=continuation_ws)
+ for s, charset in decoded_seq:
+ # None means us-ascii but we can simply pass it on to h.append()
+ if charset is not None and not isinstance(charset, Charset):
+ charset = Charset(charset)
+ h.append(s, charset)
+ return h
+
+
+class Header(object):
+ def __init__(self, s=None, charset=None,
+ maxlinelen=None, header_name=None,
+ continuation_ws=' ', errors='strict'):
+ """Create a MIME-compliant header that can contain many character sets.
+
+ Optional s is the initial header value. If None, the initial header
+ value is not set. You can later append to the header with .append()
+ method calls. s may be a byte string or a Unicode string, but see the
+ .append() documentation for semantics.
+
+ Optional charset serves two purposes: it has the same meaning as the
+ charset argument to the .append() method. It also sets the default
+ character set for all subsequent .append() calls that omit the charset
+ argument. If charset is not provided in the constructor, the us-ascii
+ charset is used both as s's initial charset and as the default for
+ subsequent .append() calls.
+
+ The maximum line length can be specified explicitly via maxlinelen. For
+ splitting the first line to a shorter value (to account for the field
+ header which isn't included in s, e.g. `Subject') pass in the name of
+ the field in header_name. The default maxlinelen is 78 as recommended
+ by RFC 2822.
+
+ continuation_ws must be RFC 2822 compliant folding whitespace (usually
+ either a space or a hard tab) which will be prepended to continuation
+ lines.
+
+ errors is passed through to the .append() call.
+ """
+ if charset is None:
+ charset = USASCII
+ elif not isinstance(charset, Charset):
+ charset = Charset(charset)
+ self._charset = charset
+ self._continuation_ws = continuation_ws
+ self._chunks = []
+ if s is not None:
+ self.append(s, charset, errors)
+ if maxlinelen is None:
+ maxlinelen = MAXLINELEN
+ self._maxlinelen = maxlinelen
+ if header_name is None:
+ self._headerlen = 0
+ else:
+ # Take the separating colon and space into account.
+ self._headerlen = len(header_name) + 2
+
+ def __str__(self):
+ """Return the string value of the header."""
+ self._normalize()
+ uchunks = []
+ lastcs = None
+ lastspace = None
+ for string, charset in self._chunks:
+ # We must preserve spaces between encoded and non-encoded word
+ # boundaries, which means for us we need to add a space when we go
+ # from a charset to None/us-ascii, or from None/us-ascii to a
+ # charset. Only do this for the second and subsequent chunks.
+ # Don't add a space if the None/us-ascii string already has
+ # a space (trailing or leading depending on transition)
+ nextcs = charset
+ if nextcs == _charset.UNKNOWN8BIT:
+ original_bytes = string.encode('ascii', 'surrogateescape')
+ string = original_bytes.decode('ascii', 'replace')
+ if uchunks:
+ hasspace = string and self._nonctext(string[0])
+ if lastcs not in (None, 'us-ascii'):
+ if nextcs in (None, 'us-ascii') and not hasspace:
+ uchunks.append(SPACE)
+ nextcs = None
+ elif nextcs not in (None, 'us-ascii') and not lastspace:
+ uchunks.append(SPACE)
+ lastspace = string and self._nonctext(string[-1])
+ lastcs = nextcs
+ uchunks.append(string)
+ return EMPTYSTRING.join(uchunks)
+
+ # Rich comparison operators for equality only. BAW: does it make sense to
+ # have or explicitly disable <, <=, >, >= operators?
+ def __eq__(self, other):
+ # other may be a Header or a string. Both are fine so coerce
+ # ourselves to a unicode (of the unencoded header value), swap the
+ # args and do another comparison.
+ return other == str(self)
+
+ def __ne__(self, other):
+ return not self == other
+
+ def append(self, s, charset=None, errors='strict'):
+ """Append a string to the MIME header.
+
+ Optional charset, if given, should be a Charset instance or the name
+ of a character set (which will be converted to a Charset instance). A
+ value of None (the default) means that the charset given in the
+ constructor is used.
+
+ s may be a byte string or a Unicode string. If it is a byte string
+ (i.e. isinstance(s, str) is false), then charset is the encoding of
+ that byte string, and a UnicodeError will be raised if the string
+ cannot be decoded with that charset. If s is a Unicode string, then
+ charset is a hint specifying the character set of the characters in
+ the string. In either case, when producing an RFC 2822 compliant
+ header using RFC 2047 rules, the string will be encoded using the
+ output codec of the charset. If the string cannot be encoded to the
+ output codec, a UnicodeError will be raised.
+
+ Optional `errors' is passed as the errors argument to the decode
+ call if s is a byte string.
+ """
+ if charset is None:
+ charset = self._charset
+ elif not isinstance(charset, Charset):
+ charset = Charset(charset)
+ if not isinstance(s, str):
+ input_charset = charset.input_codec or 'us-ascii'
+ if input_charset == _charset.UNKNOWN8BIT:
+ s = s.decode('us-ascii', 'surrogateescape')
+ else:
+ s = s.decode(input_charset, errors)
+ # Ensure that the bytes we're storing can be decoded to the output
+ # character set, otherwise an early error is raised.
+ output_charset = charset.output_codec or 'us-ascii'
+ if output_charset != _charset.UNKNOWN8BIT:
+ try:
+ s.encode(output_charset, errors)
+ except UnicodeEncodeError:
+ if output_charset!='us-ascii':
+ raise
+ charset = UTF8
+ self._chunks.append((s, charset))
+
+ def _nonctext(self, s):
+ """True if string s is not a ctext character of RFC822.
+ """
+ return s.isspace() or s in ('(', ')', '\\')
+
+ def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
+ r"""Encode a message header into an RFC-compliant format.
+
+ There are many issues involved in converting a given string for use in
+ an email header. Only certain character sets are readable in most
+ email clients, and as header strings can only contain a subset of
+ 7-bit ASCII, care must be taken to properly convert and encode (with
+ Base64 or quoted-printable) header strings. In addition, there is a
+ 75-character length limit on any given encoded header field, so
+ line-wrapping must be performed, even with double-byte character sets.
+
+ Optional maxlinelen specifies the maximum length of each generated
+ line, exclusive of the linesep string. Individual lines may be longer
+ than maxlinelen if a folding point cannot be found. The first line
+ will be shorter by the length of the header name plus ": " if a header
+ name was specified at Header construction time. The default value for
+ maxlinelen is determined at header construction time.
+
+ Optional splitchars is a string containing characters which should be
+ given extra weight by the splitting algorithm during normal header
+ wrapping. This is in very rough support of RFC 2822's `higher level
+ syntactic breaks': split points preceded by a splitchar are preferred
+ during line splitting, with the characters preferred in the order in
+ which they appear in the string. Space and tab may be included in the
+ string to indicate whether preference should be given to one over the
+ other as a split point when other split chars do not appear in the line
+ being split. Splitchars does not affect RFC 2047 encoded lines.
+
+ Optional linesep is a string to be used to separate the lines of
+ the value. The default value is the most useful for typical
+ Python applications, but it can be set to \r\n to produce RFC-compliant
+ line separators when needed.
+ """
+ self._normalize()
+ if maxlinelen is None:
+ maxlinelen = self._maxlinelen
+ # A maxlinelen of 0 means don't wrap. For all practical purposes,
+ # choosing a huge number here accomplishes that and makes the
+ # _ValueFormatter algorithm much simpler.
+ if maxlinelen == 0:
+ maxlinelen = 1000000
+ formatter = _ValueFormatter(self._headerlen, maxlinelen,
+ self._continuation_ws, splitchars)
+ lastcs = None
+ hasspace = lastspace = None
+ for string, charset in self._chunks:
+ if hasspace is not None:
+ hasspace = string and self._nonctext(string[0])
+ import sys
+ if lastcs not in (None, 'us-ascii'):
+ if not hasspace or charset not in (None, 'us-ascii'):
+ formatter.add_transition()
+ elif charset not in (None, 'us-ascii') and not lastspace:
+ formatter.add_transition()
+ lastspace = string and self._nonctext(string[-1])
+ lastcs = charset
+ hasspace = False
+ lines = string.splitlines()
+ if lines:
+ formatter.feed('', lines[0], charset)
+ else:
+ formatter.feed('', '', charset)
+ for line in lines[1:]:
+ formatter.newline()
+ if charset.header_encoding is not None:
+ formatter.feed(self._continuation_ws, ' ' + line.lstrip(),
+ charset)
+ else:
+ sline = line.lstrip()
+ fws = line[:len(line)-len(sline)]
+ formatter.feed(fws, sline, charset)
+ if len(lines) > 1:
+ formatter.newline()
+ if self._chunks:
+ formatter.add_transition()
+ value = formatter._str(linesep)
+ if _embeded_header.search(value):
+ raise HeaderParseError("header value appears to contain "
+ "an embedded header: {!r}".format(value))
+ return value
+
+ def _normalize(self):
+ # Step 1: Normalize the chunks so that all runs of identical charsets
+ # get collapsed into a single unicode string.
+ chunks = []
+ last_charset = None
+ last_chunk = []
+ for string, charset in self._chunks:
+ if charset == last_charset:
+ last_chunk.append(string)
+ else:
+ if last_charset is not None:
+ chunks.append((SPACE.join(last_chunk), last_charset))
+ last_chunk = [string]
+ last_charset = charset
+ if last_chunk:
+ chunks.append((SPACE.join(last_chunk), last_charset))
+ self._chunks = chunks
+
+
+class _ValueFormatter(object):
+ def __init__(self, headerlen, maxlen, continuation_ws, splitchars):
+ self._maxlen = maxlen
+ self._continuation_ws = continuation_ws
+ self._continuation_ws_len = len(continuation_ws)
+ self._splitchars = splitchars
+ self._lines = []
+ self._current_line = _Accumulator(headerlen)
+
+ def _str(self, linesep):
+ self.newline()
+ return linesep.join(self._lines)
+
+ def __str__(self):
+ return self._str(NL)
+
+ def newline(self):
+ end_of_line = self._current_line.pop()
+ if end_of_line != (' ', ''):
+ self._current_line.push(*end_of_line)
+ if len(self._current_line) > 0:
+ if self._current_line.is_onlyws():
+ self._lines[-1] += str(self._current_line)
+ else:
+ self._lines.append(str(self._current_line))
+ self._current_line.reset()
+
+ def add_transition(self):
+ self._current_line.push(' ', '')
+
+ def feed(self, fws, string, charset):
+ # If the charset has no header encoding (i.e. it is an ASCII encoding)
+ # then we must split the header at the "highest level syntactic break"
+ # possible. Note that we don't have a lot of smarts about field
+ # syntax; we just try to break on semi-colons, then commas, then
+ # whitespace. Eventually, this should be pluggable.
+ if charset.header_encoding is None:
+ self._ascii_split(fws, string, self._splitchars)
+ return
+ # Otherwise, we're doing either a Base64 or a quoted-printable
+ # encoding which means we don't need to split the line on syntactic
+ # breaks. We can basically just find enough characters to fit on the
+ # current line, minus the RFC 2047 chrome. What makes this trickier
+ # though is that we have to split at octet boundaries, not character
+ # boundaries but it's only safe to split at character boundaries so at
+ # best we can only get close.
+ encoded_lines = charset.header_encode_lines(string, self._maxlengths())
+ # The first element extends the current line, but if it's None then
+ # nothing more fit on the current line so start a new line.
+ try:
+ first_line = encoded_lines.pop(0)
+ except IndexError:
+ # There are no encoded lines, so we're done.
+ return
+ if first_line is not None:
+ self._append_chunk(fws, first_line)
+ try:
+ last_line = encoded_lines.pop()
+ except IndexError:
+ # There was only one line.
+ return
+ self.newline()
+ self._current_line.push(self._continuation_ws, last_line)
+ # Everything else are full lines in themselves.
+ for line in encoded_lines:
+ self._lines.append(self._continuation_ws + line)
+
+ def _maxlengths(self):
+ # The first line's length.
+ yield self._maxlen - len(self._current_line)
+ while True:
+ yield self._maxlen - self._continuation_ws_len
+
+ def _ascii_split(self, fws, string, splitchars):
+ # The RFC 2822 header folding algorithm is simple in principle but
+ # complex in practice. Lines may be folded any place where "folding
+ # white space" appears by inserting a linesep character in front of the
+ # FWS. The complication is that not all spaces or tabs qualify as FWS,
+ # and we are also supposed to prefer to break at "higher level
+ # syntactic breaks". We can't do either of these without intimate
+ # knowledge of the structure of structured headers, which we don't have
+ # here. So the best we can do here is prefer to break at the specified
+ # splitchars, and hope that we don't choose any spaces or tabs that
+ # aren't legal FWS. (This is at least better than the old algorithm,
+ # where we would sometimes *introduce* FWS after a splitchar, or the
+ # algorithm before that, where we would turn all white space runs into
+ # single spaces or tabs.)
+ parts = re.split("(["+FWS+"]+)", fws+string)
+ if parts[0]:
+ parts[:0] = ['']
+ else:
+ parts.pop(0)
+ for fws, part in zip(*[iter(parts)]*2):
+ self._append_chunk(fws, part)
+
+ def _append_chunk(self, fws, string):
+ self._current_line.push(fws, string)
+ if len(self._current_line) > self._maxlen:
+ # Find the best split point, working backward from the end.
+ # There might be none, on a long first line.
+ for ch in self._splitchars:
+ for i in range(self._current_line.part_count()-1, 0, -1):
+ if ch.isspace():
+ fws = self._current_line[i][0]
+ if fws and fws[0]==ch:
+ break
+ prevpart = self._current_line[i-1][1]
+ if prevpart and prevpart[-1]==ch:
+ break
+ else:
+ continue
+ break
+ else:
+ fws, part = self._current_line.pop()
+ if self._current_line._initial_size > 0:
+ # There will be a header, so leave it on a line by itself.
+ self.newline()
+ if not fws:
+ # We don't use continuation_ws here because the whitespace
+ # after a header should always be a space.
+ fws = ' '
+ self._current_line.push(fws, part)
+ return
+ remainder = self._current_line.pop_from(i)
+ self._lines.append(str(self._current_line))
+ self._current_line.reset(remainder)
+
+
+class _Accumulator(list):
+
+ def __init__(self, initial_size=0):
+ self._initial_size = initial_size
+ super().__init__()
+
+ def push(self, fws, string):
+ self.append((fws, string))
+
+ def pop_from(self, i=0):
+ popped = self[i:]
+ self[i:] = []
+ return popped
+
+ def pop(self):
+ if self.part_count()==0:
+ return ('', '')
+ return super().pop()
+
+ def __len__(self):
+ return sum((len(fws)+len(part) for fws, part in self),
+ self._initial_size)
+
+ def __str__(self):
+ return EMPTYSTRING.join((EMPTYSTRING.join((fws, part))
+ for fws, part in self))
+
+ def reset(self, startval=None):
+ if startval is None:
+ startval = []
+ self[:] = startval
+ self._initial_size = 0
+
+ def is_onlyws(self):
+ return self._initial_size==0 and (not self or str(self).isspace())
+
+ def part_count(self):
+ return super().__len__()
diff --git a/src/future/backports/email/headerregistry.py b/src/future/backports/email/headerregistry.py
new file mode 100644
index 00000000..9aaad65a
--- /dev/null
+++ b/src/future/backports/email/headerregistry.py
@@ -0,0 +1,592 @@
+"""Representing and manipulating email headers via custom objects.
+
+This module provides an implementation of the HeaderRegistry API.
+The implementation is designed to flexibly follow RFC5322 rules.
+
+Eventually HeaderRegistry will be a public API, but it isn't yet,
+and will probably change some before that happens.
+
+"""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+from future.builtins import super
+from future.builtins import str
+from future.utils import text_to_native_str
+from future.backports.email import utils
+from future.backports.email import errors
+from future.backports.email import _header_value_parser as parser
+
+class Address(object):
+
+ def __init__(self, display_name='', username='', domain='', addr_spec=None):
+ """Create an object represeting a full email address.
+
+ An address can have a 'display_name', a 'username', and a 'domain'. In
+ addition to specifying the username and domain separately, they may be
+ specified together by using the addr_spec keyword *instead of* the
+ username and domain keywords. If an addr_spec string is specified it
+ must be properly quoted according to RFC 5322 rules; an error will be
+ raised if it is not.
+
+ An Address object has display_name, username, domain, and addr_spec
+ attributes, all of which are read-only. The addr_spec and the string
+ value of the object are both quoted according to RFC5322 rules, but
+ without any Content Transfer Encoding.
+
+ """
+ # This clause with its potential 'raise' may only happen when an
+ # application program creates an Address object using an addr_spec
+ # keyword. The email library code itself must always supply username
+ # and domain.
+ if addr_spec is not None:
+ if username or domain:
+ raise TypeError("addrspec specified when username and/or "
+ "domain also specified")
+ a_s, rest = parser.get_addr_spec(addr_spec)
+ if rest:
+ raise ValueError("Invalid addr_spec; only '{}' "
+ "could be parsed from '{}'".format(
+ a_s, addr_spec))
+ if a_s.all_defects:
+ raise a_s.all_defects[0]
+ username = a_s.local_part
+ domain = a_s.domain
+ self._display_name = display_name
+ self._username = username
+ self._domain = domain
+
+ @property
+ def display_name(self):
+ return self._display_name
+
+ @property
+ def username(self):
+ return self._username
+
+ @property
+ def domain(self):
+ return self._domain
+
+ @property
+ def addr_spec(self):
+ """The addr_spec (username@domain) portion of the address, quoted
+ according to RFC 5322 rules, but with no Content Transfer Encoding.
+ """
+ nameset = set(self.username)
+ if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
+ lp = parser.quote_string(self.username)
+ else:
+ lp = self.username
+ if self.domain:
+ return lp + '@' + self.domain
+ if not lp:
+ return '<>'
+ return lp
+
+ def __repr__(self):
+ return "Address(display_name={!r}, username={!r}, domain={!r})".format(
+ self.display_name, self.username, self.domain)
+
+ def __str__(self):
+ nameset = set(self.display_name)
+ if len(nameset) > len(nameset-parser.SPECIALS):
+ disp = parser.quote_string(self.display_name)
+ else:
+ disp = self.display_name
+ if disp:
+ addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
+ return "{} <{}>".format(disp, addr_spec)
+ return self.addr_spec
+
+ def __eq__(self, other):
+ if type(other) != type(self):
+ return False
+ return (self.display_name == other.display_name and
+ self.username == other.username and
+ self.domain == other.domain)
+
+
+class Group(object):
+
+ def __init__(self, display_name=None, addresses=None):
+ """Create an object representing an address group.
+
+ An address group consists of a display_name followed by colon and an
+ list of addresses (see Address) terminated by a semi-colon. The Group
+ is created by specifying a display_name and a possibly empty list of
+ Address objects. A Group can also be used to represent a single
+ address that is not in a group, which is convenient when manipulating
+ lists that are a combination of Groups and individual Addresses. In
+ this case the display_name should be set to None. In particular, the
+ string representation of a Group whose display_name is None is the same
+ as the Address object, if there is one and only one Address object in
+ the addresses list.
+
+ """
+ self._display_name = display_name
+ self._addresses = tuple(addresses) if addresses else tuple()
+
+ @property
+ def display_name(self):
+ return self._display_name
+
+ @property
+ def addresses(self):
+ return self._addresses
+
+ def __repr__(self):
+ return "Group(display_name={!r}, addresses={!r}".format(
+ self.display_name, self.addresses)
+
+ def __str__(self):
+ if self.display_name is None and len(self.addresses)==1:
+ return str(self.addresses[0])
+ disp = self.display_name
+ if disp is not None:
+ nameset = set(disp)
+ if len(nameset) > len(nameset-parser.SPECIALS):
+ disp = parser.quote_string(disp)
+ adrstr = ", ".join(str(x) for x in self.addresses)
+ adrstr = ' ' + adrstr if adrstr else adrstr
+ return "{}:{};".format(disp, adrstr)
+
+ def __eq__(self, other):
+ if type(other) != type(self):
+ return False
+ return (self.display_name == other.display_name and
+ self.addresses == other.addresses)
+
+
+# Header Classes #
+
+class BaseHeader(str):
+
+ """Base class for message headers.
+
+ Implements generic behavior and provides tools for subclasses.
+
+ A subclass must define a classmethod named 'parse' that takes an unfolded
+ value string and a dictionary as its arguments. The dictionary will
+ contain one key, 'defects', initialized to an empty list. After the call
+ the dictionary must contain two additional keys: parse_tree, set to the
+ parse tree obtained from parsing the header, and 'decoded', set to the
+ string value of the idealized representation of the data from the value.
+ (That is, encoded words are decoded, and values that have canonical
+ representations are so represented.)
+
+ The defects key is intended to collect parsing defects, which the message
+ parser will subsequently dispose of as appropriate. The parser should not,
+ insofar as practical, raise any errors. Defects should be added to the
+ list instead. The standard header parsers register defects for RFC
+ compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
+ errors.
+
+ The parse method may add additional keys to the dictionary. In this case
+ the subclass must define an 'init' method, which will be passed the
+ dictionary as its keyword arguments. The method should use (usually by
+ setting them as the value of similarly named attributes) and remove all the
+ extra keys added by its parse method, and then use super to call its parent
+ class with the remaining arguments and keywords.
+
+ The subclass should also make sure that a 'max_count' attribute is defined
+ that is either None or 1. XXX: need to better define this API.
+
+ """
+
+ def __new__(cls, name, value):
+ kwds = {'defects': []}
+ cls.parse(value, kwds)
+ if utils._has_surrogates(kwds['decoded']):
+ kwds['decoded'] = utils._sanitize(kwds['decoded'])
+ self = str.__new__(cls, kwds['decoded'])
+ # del kwds['decoded']
+ self.init(name, **kwds)
+ return self
+
+ def init(self, name, **_3to2kwargs):
+ defects = _3to2kwargs['defects']; del _3to2kwargs['defects']
+ parse_tree = _3to2kwargs['parse_tree']; del _3to2kwargs['parse_tree']
+ self._name = name
+ self._parse_tree = parse_tree
+ self._defects = defects
+
+ @property
+ def name(self):
+ return self._name
+
+ @property
+ def defects(self):
+ return tuple(self._defects)
+
+ def __reduce__(self):
+ return (
+ _reconstruct_header,
+ (
+ self.__class__.__name__,
+ self.__class__.__bases__,
+ str(self),
+ ),
+ self.__dict__)
+
+ @classmethod
+ def _reconstruct(cls, value):
+ return str.__new__(cls, value)
+
+ def fold(self, **_3to2kwargs):
+ policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
+ """Fold header according to policy.
+
+ The parsed representation of the header is folded according to
+ RFC5322 rules, as modified by the policy. If the parse tree
+ contains surrogateescaped bytes, the bytes are CTE encoded using
+ the charset 'unknown-8bit".
+
+ Any non-ASCII characters in the parse tree are CTE encoded using
+ charset utf-8. XXX: make this a policy setting.
+
+ The returned value is an ASCII-only string possibly containing linesep
+ characters, and ending with a linesep character. The string includes
+ the header name and the ': ' separator.
+
+ """
+ # At some point we need to only put fws here if it was in the source.
+ header = parser.Header([
+ parser.HeaderLabel([
+ parser.ValueTerminal(self.name, 'header-name'),
+ parser.ValueTerminal(':', 'header-sep')]),
+ parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]),
+ self._parse_tree])
+ return header.fold(policy=policy)
+
+
+def _reconstruct_header(cls_name, bases, value):
+ return type(text_to_native_str(cls_name), bases, {})._reconstruct(value)
+
+
+class UnstructuredHeader(object):
+
+ max_count = None
+ value_parser = staticmethod(parser.get_unstructured)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = cls.value_parser(value)
+ kwds['decoded'] = str(kwds['parse_tree'])
+
+
+class UniqueUnstructuredHeader(UnstructuredHeader):
+
+ max_count = 1
+
+
+class DateHeader(object):
+
+ """Header whose value consists of a single timestamp.
+
+ Provides an additional attribute, datetime, which is either an aware
+ datetime using a timezone, or a naive datetime if the timezone
+ in the input string is -0000. Also accepts a datetime as input.
+ The 'value' attribute is the normalized form of the timestamp,
+ which means it is the output of format_datetime on the datetime.
+ """
+
+ max_count = None
+
+ # This is used only for folding, not for creating 'decoded'.
+ value_parser = staticmethod(parser.get_unstructured)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ if not value:
+ kwds['defects'].append(errors.HeaderMissingRequiredValue())
+ kwds['datetime'] = None
+ kwds['decoded'] = ''
+ kwds['parse_tree'] = parser.TokenList()
+ return
+ if isinstance(value, str):
+ value = utils.parsedate_to_datetime(value)
+ kwds['datetime'] = value
+ kwds['decoded'] = utils.format_datetime(kwds['datetime'])
+ kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
+
+ def init(self, *args, **kw):
+ self._datetime = kw.pop('datetime')
+ super().init(*args, **kw)
+
+ @property
+ def datetime(self):
+ return self._datetime
+
+
+class UniqueDateHeader(DateHeader):
+
+ max_count = 1
+
+
+class AddressHeader(object):
+
+ max_count = None
+
+ @staticmethod
+ def value_parser(value):
+ address_list, value = parser.get_address_list(value)
+ assert not value, 'this should not happen'
+ return address_list
+
+ @classmethod
+ def parse(cls, value, kwds):
+ if isinstance(value, str):
+ # We are translating here from the RFC language (address/mailbox)
+ # to our API language (group/address).
+ kwds['parse_tree'] = address_list = cls.value_parser(value)
+ groups = []
+ for addr in address_list.addresses:
+ groups.append(Group(addr.display_name,
+ [Address(mb.display_name or '',
+ mb.local_part or '',
+ mb.domain or '')
+ for mb in addr.all_mailboxes]))
+ defects = list(address_list.all_defects)
+ else:
+ # Assume it is Address/Group stuff
+ if not hasattr(value, '__iter__'):
+ value = [value]
+ groups = [Group(None, [item]) if not hasattr(item, 'addresses')
+ else item
+ for item in value]
+ defects = []
+ kwds['groups'] = groups
+ kwds['defects'] = defects
+ kwds['decoded'] = ', '.join([str(item) for item in groups])
+ if 'parse_tree' not in kwds:
+ kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
+
+ def init(self, *args, **kw):
+ self._groups = tuple(kw.pop('groups'))
+ self._addresses = None
+ super().init(*args, **kw)
+
+ @property
+ def groups(self):
+ return self._groups
+
+ @property
+ def addresses(self):
+ if self._addresses is None:
+ self._addresses = tuple([address for group in self._groups
+ for address in group.addresses])
+ return self._addresses
+
+
+class UniqueAddressHeader(AddressHeader):
+
+ max_count = 1
+
+
+class SingleAddressHeader(AddressHeader):
+
+ @property
+ def address(self):
+ if len(self.addresses)!=1:
+ raise ValueError(("value of single address header {} is not "
+ "a single address").format(self.name))
+ return self.addresses[0]
+
+
+class UniqueSingleAddressHeader(SingleAddressHeader):
+
+ max_count = 1
+
+
+class MIMEVersionHeader(object):
+
+ max_count = 1
+
+ value_parser = staticmethod(parser.parse_mime_version)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+ kwds['major'] = None if parse_tree.minor is None else parse_tree.major
+ kwds['minor'] = parse_tree.minor
+ if parse_tree.minor is not None:
+ kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
+ else:
+ kwds['version'] = None
+
+ def init(self, *args, **kw):
+ self._version = kw.pop('version')
+ self._major = kw.pop('major')
+ self._minor = kw.pop('minor')
+ super().init(*args, **kw)
+
+ @property
+ def major(self):
+ return self._major
+
+ @property
+ def minor(self):
+ return self._minor
+
+ @property
+ def version(self):
+ return self._version
+
+
+class ParameterizedMIMEHeader(object):
+
+ # Mixin that handles the params dict. Must be subclassed and
+ # a property value_parser for the specific header provided.
+
+ max_count = 1
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+ if parse_tree.params is None:
+ kwds['params'] = {}
+ else:
+ # The MIME RFCs specify that parameter ordering is arbitrary.
+ kwds['params'] = dict((utils._sanitize(name).lower(),
+ utils._sanitize(value))
+ for name, value in parse_tree.params)
+
+ def init(self, *args, **kw):
+ self._params = kw.pop('params')
+ super().init(*args, **kw)
+
+ @property
+ def params(self):
+ return self._params.copy()
+
+
+class ContentTypeHeader(ParameterizedMIMEHeader):
+
+ value_parser = staticmethod(parser.parse_content_type_header)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ self._maintype = utils._sanitize(self._parse_tree.maintype)
+ self._subtype = utils._sanitize(self._parse_tree.subtype)
+
+ @property
+ def maintype(self):
+ return self._maintype
+
+ @property
+ def subtype(self):
+ return self._subtype
+
+ @property
+ def content_type(self):
+ return self.maintype + '/' + self.subtype
+
+
+class ContentDispositionHeader(ParameterizedMIMEHeader):
+
+ value_parser = staticmethod(parser.parse_content_disposition_header)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ cd = self._parse_tree.content_disposition
+ self._content_disposition = cd if cd is None else utils._sanitize(cd)
+
+ @property
+ def content_disposition(self):
+ return self._content_disposition
+
+
+class ContentTransferEncodingHeader(object):
+
+ max_count = 1
+
+ value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ self._cte = utils._sanitize(self._parse_tree.cte)
+
+ @property
+ def cte(self):
+ return self._cte
+
+
+# The header factory #
+
+_default_header_map = {
+ 'subject': UniqueUnstructuredHeader,
+ 'date': UniqueDateHeader,
+ 'resent-date': DateHeader,
+ 'orig-date': UniqueDateHeader,
+ 'sender': UniqueSingleAddressHeader,
+ 'resent-sender': SingleAddressHeader,
+ 'to': UniqueAddressHeader,
+ 'resent-to': AddressHeader,
+ 'cc': UniqueAddressHeader,
+ 'resent-cc': AddressHeader,
+ 'bcc': UniqueAddressHeader,
+ 'resent-bcc': AddressHeader,
+ 'from': UniqueAddressHeader,
+ 'resent-from': AddressHeader,
+ 'reply-to': UniqueAddressHeader,
+ 'mime-version': MIMEVersionHeader,
+ 'content-type': ContentTypeHeader,
+ 'content-disposition': ContentDispositionHeader,
+ 'content-transfer-encoding': ContentTransferEncodingHeader,
+ }
+
+class HeaderRegistry(object):
+
+ """A header_factory and header registry."""
+
+ def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
+ use_default_map=True):
+ """Create a header_factory that works with the Policy API.
+
+ base_class is the class that will be the last class in the created
+ header class's __bases__ list. default_class is the class that will be
+ used if "name" (see __call__) does not appear in the registry.
+ use_default_map controls whether or not the default mapping of names to
+ specialized classes is copied in to the registry when the factory is
+ created. The default is True.
+
+ """
+ self.registry = {}
+ self.base_class = base_class
+ self.default_class = default_class
+ if use_default_map:
+ self.registry.update(_default_header_map)
+
+ def map_to_type(self, name, cls):
+ """Register cls as the specialized class for handling "name" headers.
+
+ """
+ self.registry[name.lower()] = cls
+
+ def __getitem__(self, name):
+ cls = self.registry.get(name.lower(), self.default_class)
+ return type(text_to_native_str('_'+cls.__name__), (cls, self.base_class), {})
+
+ def __call__(self, name, value):
+ """Create a header instance for header 'name' from 'value'.
+
+ Creates a header instance by creating a specialized class for parsing
+ and representing the specified header by combining the factory
+ base_class with a specialized class from the registry or the
+ default_class, and passing the name and value to the constructed
+ class's constructor.
+
+ """
+ return self[name](name, value)
diff --git a/src/future/backports/email/iterators.py b/src/future/backports/email/iterators.py
new file mode 100644
index 00000000..82d320f8
--- /dev/null
+++ b/src/future/backports/email/iterators.py
@@ -0,0 +1,74 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Various types of useful iterators and generators."""
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = [
+ 'body_line_iterator',
+ 'typed_subpart_iterator',
+ 'walk',
+ # Do not include _structure() since it's part of the debugging API.
+ ]
+
+import sys
+from io import StringIO
+
+
+# This function will become a method of the Message class
+def walk(self):
+ """Walk over the message tree, yielding each subpart.
+
+ The walk is performed in depth-first order. This method is a
+ generator.
+ """
+ yield self
+ if self.is_multipart():
+ for subpart in self.get_payload():
+ for subsubpart in subpart.walk():
+ yield subsubpart
+
+
+# These two functions are imported into the Iterators.py interface module.
+def body_line_iterator(msg, decode=False):
+ """Iterate over the parts, returning string payloads line-by-line.
+
+ Optional decode (default False) is passed through to .get_payload().
+ """
+ for subpart in msg.walk():
+ payload = subpart.get_payload(decode=decode)
+ if isinstance(payload, str):
+ for line in StringIO(payload):
+ yield line
+
+
+def typed_subpart_iterator(msg, maintype='text', subtype=None):
+ """Iterate over the subparts with a given MIME type.
+
+ Use `maintype' as the main MIME type to match against; this defaults to
+ "text". Optional `subtype' is the MIME subtype to match against; if
+ omitted, only the main type is matched.
+ """
+ for subpart in msg.walk():
+ if subpart.get_content_maintype() == maintype:
+ if subtype is None or subpart.get_content_subtype() == subtype:
+ yield subpart
+
+
+def _structure(msg, fp=None, level=0, include_default=False):
+ """A handy debugging aid"""
+ if fp is None:
+ fp = sys.stdout
+ tab = ' ' * (level * 4)
+ print(tab + msg.get_content_type(), end='', file=fp)
+ if include_default:
+ print(' [%s]' % msg.get_default_type(), file=fp)
+ else:
+ print(file=fp)
+ if msg.is_multipart():
+ for subpart in msg.get_payload():
+ _structure(subpart, fp, level+1, include_default)
diff --git a/src/future/backports/email/message.py b/src/future/backports/email/message.py
new file mode 100644
index 00000000..d8d9615d
--- /dev/null
+++ b/src/future/backports/email/message.py
@@ -0,0 +1,882 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Basic message object for the email package object model."""
+from __future__ import absolute_import, division, unicode_literals
+from future.builtins import list, range, str, zip
+
+__all__ = ['Message']
+
+import re
+import uu
+import base64
+import binascii
+from io import BytesIO, StringIO
+
+# Intrapackage imports
+from future.utils import as_native_str
+from future.backports.email import utils
+from future.backports.email import errors
+from future.backports.email._policybase import compat32
+from future.backports.email import charset as _charset
+from future.backports.email._encoded_words import decode_b
+Charset = _charset.Charset
+
+SEMISPACE = '; '
+
+# Regular expression that matches `special' characters in parameters, the
+# existence of which force quoting of the parameter value.
+tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
+
+
+def _splitparam(param):
+ # Split header parameters. BAW: this may be too simple. It isn't
+ # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
+ # found in the wild. We may eventually need a full fledged parser.
+ # RDM: we might have a Header here; for now just stringify it.
+ a, sep, b = str(param).partition(';')
+ if not sep:
+ return a.strip(), None
+ return a.strip(), b.strip()
+
+def _formatparam(param, value=None, quote=True):
+ """Convenience function to format and return a key=value pair.
+
+ This will quote the value if needed or if quote is true. If value is a
+ three tuple (charset, language, value), it will be encoded according
+ to RFC2231 rules. If it contains non-ascii characters it will likewise
+ be encoded according to RFC2231 rules, using the utf-8 charset and
+ a null language.
+ """
+ if value is not None and len(value) > 0:
+ # A tuple is used for RFC 2231 encoded parameter values where items
+ # are (charset, language, value). charset is a string, not a Charset
+ # instance. RFC 2231 encoded values are never quoted, per RFC.
+ if isinstance(value, tuple):
+ # Encode as per RFC 2231
+ param += '*'
+ value = utils.encode_rfc2231(value[2], value[0], value[1])
+ return '%s=%s' % (param, value)
+ else:
+ try:
+ value.encode('ascii')
+ except UnicodeEncodeError:
+ param += '*'
+ value = utils.encode_rfc2231(value, 'utf-8', '')
+ return '%s=%s' % (param, value)
+ # BAW: Please check this. I think that if quote is set it should
+ # force quoting even if not necessary.
+ if quote or tspecials.search(value):
+ return '%s="%s"' % (param, utils.quote(value))
+ else:
+ return '%s=%s' % (param, value)
+ else:
+ return param
+
+def _parseparam(s):
+ # RDM This might be a Header, so for now stringify it.
+ s = ';' + str(s)
+ plist = []
+ while s[:1] == ';':
+ s = s[1:]
+ end = s.find(';')
+ while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
+ end = s.find(';', end + 1)
+ if end < 0:
+ end = len(s)
+ f = s[:end]
+ if '=' in f:
+ i = f.index('=')
+ f = f[:i].strip().lower() + '=' + f[i+1:].strip()
+ plist.append(f.strip())
+ s = s[end:]
+ return plist
+
+
+def _unquotevalue(value):
+ # This is different than utils.collapse_rfc2231_value() because it doesn't
+ # try to convert the value to a unicode. Message.get_param() and
+ # Message.get_params() are both currently defined to return the tuple in
+ # the face of RFC 2231 parameters.
+ if isinstance(value, tuple):
+ return value[0], value[1], utils.unquote(value[2])
+ else:
+ return utils.unquote(value)
+
+
+class Message(object):
+ """Basic message object.
+
+ A message object is defined as something that has a bunch of RFC 2822
+ headers and a payload. It may optionally have an envelope header
+ (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
+ multipart or a message/rfc822), then the payload is a list of Message
+ objects, otherwise it is a string.
+
+ Message objects implement part of the `mapping' interface, which assumes
+ there is exactly one occurrence of the header per message. Some headers
+ do in fact appear multiple times (e.g. Received) and for those headers,
+ you must use the explicit API to set or get all the headers. Not all of
+ the mapping methods are implemented.
+ """
+ def __init__(self, policy=compat32):
+ self.policy = policy
+ self._headers = list()
+ self._unixfrom = None
+ self._payload = None
+ self._charset = None
+ # Defaults for multipart messages
+ self.preamble = self.epilogue = None
+ self.defects = []
+ # Default content type
+ self._default_type = 'text/plain'
+
+ @as_native_str(encoding='utf-8')
+ def __str__(self):
+ """Return the entire formatted message as a string.
+ This includes the headers, body, and envelope header.
+ """
+ return self.as_string()
+
+ def as_string(self, unixfrom=False, maxheaderlen=0):
+ """Return the entire formatted message as a (unicode) string.
+ Optional `unixfrom' when True, means include the Unix From_ envelope
+ header.
+
+ This is a convenience method and may not generate the message exactly
+ as you intend. For more flexibility, use the flatten() method of a
+ Generator instance.
+ """
+ from future.backports.email.generator import Generator
+ fp = StringIO()
+ g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
+ g.flatten(self, unixfrom=unixfrom)
+ return fp.getvalue()
+
+ def is_multipart(self):
+ """Return True if the message consists of multiple parts."""
+ return isinstance(self._payload, list)
+
+ #
+ # Unix From_ line
+ #
+ def set_unixfrom(self, unixfrom):
+ self._unixfrom = unixfrom
+
+ def get_unixfrom(self):
+ return self._unixfrom
+
+ #
+ # Payload manipulation.
+ #
+ def attach(self, payload):
+ """Add the given payload to the current payload.
+
+ The current payload will always be a list of objects after this method
+ is called. If you want to set the payload to a scalar object, use
+ set_payload() instead.
+ """
+ if self._payload is None:
+ self._payload = [payload]
+ else:
+ self._payload.append(payload)
+
+ def get_payload(self, i=None, decode=False):
+ """Return a reference to the payload.
+
+ The payload will either be a list object or a string. If you mutate
+ the list object, you modify the message's payload in place. Optional
+ i returns that index into the payload.
+
+ Optional decode is a flag indicating whether the payload should be
+ decoded or not, according to the Content-Transfer-Encoding header
+ (default is False).
+
+ When True and the message is not a multipart, the payload will be
+ decoded if this header's value is `quoted-printable' or `base64'. If
+ some other encoding is used, or the header is missing, or if the
+ payload has bogus data (i.e. bogus base64 or uuencoded data), the
+ payload is returned as-is.
+
+ If the message is a multipart and the decode flag is True, then None
+ is returned.
+ """
+ # Here is the logic table for this code, based on the email5.0.0 code:
+ # i decode is_multipart result
+ # ------ ------ ------------ ------------------------------
+ # None True True None
+ # i True True None
+ # None False True _payload (a list)
+ # i False True _payload element i (a Message)
+ # i False False error (not a list)
+ # i True False error (not a list)
+ # None False False _payload
+ # None True False _payload decoded (bytes)
+ # Note that Barry planned to factor out the 'decode' case, but that
+ # isn't so easy now that we handle the 8 bit data, which needs to be
+ # converted in both the decode and non-decode path.
+ if self.is_multipart():
+ if decode:
+ return None
+ if i is None:
+ return self._payload
+ else:
+ return self._payload[i]
+ # For backward compatibility, Use isinstance and this error message
+ # instead of the more logical is_multipart test.
+ if i is not None and not isinstance(self._payload, list):
+ raise TypeError('Expected list, got %s' % type(self._payload))
+ payload = self._payload
+ # cte might be a Header, so for now stringify it.
+ cte = str(self.get('content-transfer-encoding', '')).lower()
+ # payload may be bytes here.
+ if isinstance(payload, str):
+ payload = str(payload) # for Python-Future, so surrogateescape works
+ if utils._has_surrogates(payload):
+ bpayload = payload.encode('ascii', 'surrogateescape')
+ if not decode:
+ try:
+ payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
+ except LookupError:
+ payload = bpayload.decode('ascii', 'replace')
+ elif decode:
+ try:
+ bpayload = payload.encode('ascii')
+ except UnicodeError:
+ # This won't happen for RFC compliant messages (messages
+ # containing only ASCII codepoints in the unicode input).
+ # If it does happen, turn the string into bytes in a way
+ # guaranteed not to fail.
+ bpayload = payload.encode('raw-unicode-escape')
+ if not decode:
+ return payload
+ if cte == 'quoted-printable':
+ return utils._qdecode(bpayload)
+ elif cte == 'base64':
+ # XXX: this is a bit of a hack; decode_b should probably be factored
+ # out somewhere, but I haven't figured out where yet.
+ value, defects = decode_b(b''.join(bpayload.splitlines()))
+ for defect in defects:
+ self.policy.handle_defect(self, defect)
+ return value
+ elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
+ in_file = BytesIO(bpayload)
+ out_file = BytesIO()
+ try:
+ uu.decode(in_file, out_file, quiet=True)
+ return out_file.getvalue()
+ except uu.Error:
+ # Some decoding problem
+ return bpayload
+ if isinstance(payload, str):
+ return bpayload
+ return payload
+
+ def set_payload(self, payload, charset=None):
+ """Set the payload to the given value.
+
+ Optional charset sets the message's default character set. See
+ set_charset() for details.
+ """
+ self._payload = payload
+ if charset is not None:
+ self.set_charset(charset)
+
+ def set_charset(self, charset):
+ """Set the charset of the payload to a given character set.
+
+ charset can be a Charset instance, a string naming a character set, or
+ None. If it is a string it will be converted to a Charset instance.
+ If charset is None, the charset parameter will be removed from the
+ Content-Type field. Anything else will generate a TypeError.
+
+ The message will be assumed to be of type text/* encoded with
+ charset.input_charset. It will be converted to charset.output_charset
+ and encoded properly, if needed, when generating the plain text
+ representation of the message. MIME headers (MIME-Version,
+ Content-Type, Content-Transfer-Encoding) will be added as needed.
+ """
+ if charset is None:
+ self.del_param('charset')
+ self._charset = None
+ return
+ if not isinstance(charset, Charset):
+ charset = Charset(charset)
+ self._charset = charset
+ if 'MIME-Version' not in self:
+ self.add_header('MIME-Version', '1.0')
+ if 'Content-Type' not in self:
+ self.add_header('Content-Type', 'text/plain',
+ charset=charset.get_output_charset())
+ else:
+ self.set_param('charset', charset.get_output_charset())
+ if charset != charset.get_output_charset():
+ self._payload = charset.body_encode(self._payload)
+ if 'Content-Transfer-Encoding' not in self:
+ cte = charset.get_body_encoding()
+ try:
+ cte(self)
+ except TypeError:
+ self._payload = charset.body_encode(self._payload)
+ self.add_header('Content-Transfer-Encoding', cte)
+
+ def get_charset(self):
+ """Return the Charset instance associated with the message's payload.
+ """
+ return self._charset
+
+ #
+ # MAPPING INTERFACE (partial)
+ #
+ def __len__(self):
+ """Return the total number of headers, including duplicates."""
+ return len(self._headers)
+
+ def __getitem__(self, name):
+ """Get a header value.
+
+ Return None if the header is missing instead of raising an exception.
+
+ Note that if the header appeared multiple times, exactly which
+ occurrence gets returned is undefined. Use get_all() to get all
+ the values matching a header field name.
+ """
+ return self.get(name)
+
+ def __setitem__(self, name, val):
+ """Set the value of a header.
+
+ Note: this does not overwrite an existing header with the same field
+ name. Use __delitem__() first to delete any existing headers.
+ """
+ max_count = self.policy.header_max_count(name)
+ if max_count:
+ lname = name.lower()
+ found = 0
+ for k, v in self._headers:
+ if k.lower() == lname:
+ found += 1
+ if found >= max_count:
+ raise ValueError("There may be at most {} {} headers "
+ "in a message".format(max_count, name))
+ self._headers.append(self.policy.header_store_parse(name, val))
+
+ def __delitem__(self, name):
+ """Delete all occurrences of a header, if present.
+
+ Does not raise an exception if the header is missing.
+ """
+ name = name.lower()
+ newheaders = list()
+ for k, v in self._headers:
+ if k.lower() != name:
+ newheaders.append((k, v))
+ self._headers = newheaders
+
+ def __contains__(self, name):
+ return name.lower() in [k.lower() for k, v in self._headers]
+
+ def __iter__(self):
+ for field, value in self._headers:
+ yield field
+
+ def keys(self):
+ """Return a list of all the message's header field names.
+
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return [k for k, v in self._headers]
+
+ def values(self):
+ """Return a list of all the message's header values.
+
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return [self.policy.header_fetch_parse(k, v)
+ for k, v in self._headers]
+
+ def items(self):
+ """Get all the message's header fields and values.
+
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return [(k, self.policy.header_fetch_parse(k, v))
+ for k, v in self._headers]
+
+ def get(self, name, failobj=None):
+ """Get a header value.
+
+ Like __getitem__() but return failobj instead of None when the field
+ is missing.
+ """
+ name = name.lower()
+ for k, v in self._headers:
+ if k.lower() == name:
+ return self.policy.header_fetch_parse(k, v)
+ return failobj
+
+ #
+ # "Internal" methods (public API, but only intended for use by a parser
+ # or generator, not normal application code.
+ #
+
+ def set_raw(self, name, value):
+ """Store name and value in the model without modification.
+
+ This is an "internal" API, intended only for use by a parser.
+ """
+ self._headers.append((name, value))
+
+ def raw_items(self):
+ """Return the (name, value) header pairs without modification.
+
+ This is an "internal" API, intended only for use by a generator.
+ """
+ return iter(self._headers.copy())
+
+ #
+ # Additional useful stuff
+ #
+
+ def get_all(self, name, failobj=None):
+ """Return a list of all the values for the named field.
+
+ These will be sorted in the order they appeared in the original
+ message, and may contain duplicates. Any fields deleted and
+ re-inserted are always appended to the header list.
+
+ If no such fields exist, failobj is returned (defaults to None).
+ """
+ values = []
+ name = name.lower()
+ for k, v in self._headers:
+ if k.lower() == name:
+ values.append(self.policy.header_fetch_parse(k, v))
+ if not values:
+ return failobj
+ return values
+
+ def add_header(self, _name, _value, **_params):
+ """Extended header setting.
+
+ name is the header field to add. keyword arguments can be used to set
+ additional parameters for the header field, with underscores converted
+ to dashes. Normally the parameter will be added as key="value" unless
+ value is None, in which case only the key will be added. If a
+ parameter value contains non-ASCII characters it can be specified as a
+ three-tuple of (charset, language, value), in which case it will be
+ encoded according to RFC2231 rules. Otherwise it will be encoded using
+ the utf-8 charset and a language of ''.
+
+ Examples:
+
+ msg.add_header('content-disposition', 'attachment', filename='bud.gif')
+ msg.add_header('content-disposition', 'attachment',
+ filename=('utf-8', '', 'Fußballer.ppt'))
+ msg.add_header('content-disposition', 'attachment',
+ filename='Fußballer.ppt'))
+ """
+ parts = []
+ for k, v in _params.items():
+ if v is None:
+ parts.append(k.replace('_', '-'))
+ else:
+ parts.append(_formatparam(k.replace('_', '-'), v))
+ if _value is not None:
+ parts.insert(0, _value)
+ self[_name] = SEMISPACE.join(parts)
+
+ def replace_header(self, _name, _value):
+ """Replace a header.
+
+ Replace the first matching header found in the message, retaining
+ header order and case. If no matching header was found, a KeyError is
+ raised.
+ """
+ _name = _name.lower()
+ for i, (k, v) in zip(range(len(self._headers)), self._headers):
+ if k.lower() == _name:
+ self._headers[i] = self.policy.header_store_parse(k, _value)
+ break
+ else:
+ raise KeyError(_name)
+
+ #
+ # Use these three methods instead of the three above.
+ #
+
+ def get_content_type(self):
+ """Return the message's content type.
+
+ The returned string is coerced to lower case of the form
+ `maintype/subtype'. If there was no Content-Type header in the
+ message, the default type as given by get_default_type() will be
+ returned. Since according to RFC 2045, messages always have a default
+ type this will always return a value.
+
+ RFC 2045 defines a message's default type to be text/plain unless it
+ appears inside a multipart/digest container, in which case it would be
+ message/rfc822.
+ """
+ missing = object()
+ value = self.get('content-type', missing)
+ if value is missing:
+ # This should have no parameters
+ return self.get_default_type()
+ ctype = _splitparam(value)[0].lower()
+ # RFC 2045, section 5.2 says if its invalid, use text/plain
+ if ctype.count('/') != 1:
+ return 'text/plain'
+ return ctype
+
+ def get_content_maintype(self):
+ """Return the message's main content type.
+
+ This is the `maintype' part of the string returned by
+ get_content_type().
+ """
+ ctype = self.get_content_type()
+ return ctype.split('/')[0]
+
+ def get_content_subtype(self):
+ """Returns the message's sub-content type.
+
+ This is the `subtype' part of the string returned by
+ get_content_type().
+ """
+ ctype = self.get_content_type()
+ return ctype.split('/')[1]
+
+ def get_default_type(self):
+ """Return the `default' content type.
+
+ Most messages have a default content type of text/plain, except for
+ messages that are subparts of multipart/digest containers. Such
+ subparts have a default content type of message/rfc822.
+ """
+ return self._default_type
+
+ def set_default_type(self, ctype):
+ """Set the `default' content type.
+
+ ctype should be either "text/plain" or "message/rfc822", although this
+ is not enforced. The default content type is not stored in the
+ Content-Type header.
+ """
+ self._default_type = ctype
+
+ def _get_params_preserve(self, failobj, header):
+ # Like get_params() but preserves the quoting of values. BAW:
+ # should this be part of the public interface?
+ missing = object()
+ value = self.get(header, missing)
+ if value is missing:
+ return failobj
+ params = []
+ for p in _parseparam(value):
+ try:
+ name, val = p.split('=', 1)
+ name = name.strip()
+ val = val.strip()
+ except ValueError:
+ # Must have been a bare attribute
+ name = p.strip()
+ val = ''
+ params.append((name, val))
+ params = utils.decode_params(params)
+ return params
+
+ def get_params(self, failobj=None, header='content-type', unquote=True):
+ """Return the message's Content-Type parameters, as a list.
+
+ The elements of the returned list are 2-tuples of key/value pairs, as
+ split on the `=' sign. The left hand side of the `=' is the key,
+ while the right hand side is the value. If there is no `=' sign in
+ the parameter the value is the empty string. The value is as
+ described in the get_param() method.
+
+ Optional failobj is the object to return if there is no Content-Type
+ header. Optional header is the header to search instead of
+ Content-Type. If unquote is True, the value is unquoted.
+ """
+ missing = object()
+ params = self._get_params_preserve(missing, header)
+ if params is missing:
+ return failobj
+ if unquote:
+ return [(k, _unquotevalue(v)) for k, v in params]
+ else:
+ return params
+
+ def get_param(self, param, failobj=None, header='content-type',
+ unquote=True):
+ """Return the parameter value if found in the Content-Type header.
+
+ Optional failobj is the object to return if there is no Content-Type
+ header, or the Content-Type header has no such parameter. Optional
+ header is the header to search instead of Content-Type.
+
+ Parameter keys are always compared case insensitively. The return
+ value can either be a string, or a 3-tuple if the parameter was RFC
+ 2231 encoded. When it's a 3-tuple, the elements of the value are of
+ the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
+ LANGUAGE can be None, in which case you should consider VALUE to be
+ encoded in the us-ascii charset. You can usually ignore LANGUAGE.
+ The parameter value (either the returned string, or the VALUE item in
+ the 3-tuple) is always unquoted, unless unquote is set to False.
+
+ If your application doesn't care whether the parameter was RFC 2231
+ encoded, it can turn the return value into a string as follows:
+
+ param = msg.get_param('foo')
+ param = email.utils.collapse_rfc2231_value(rawparam)
+
+ """
+ if header not in self:
+ return failobj
+ for k, v in self._get_params_preserve(failobj, header):
+ if k.lower() == param.lower():
+ if unquote:
+ return _unquotevalue(v)
+ else:
+ return v
+ return failobj
+
+ def set_param(self, param, value, header='Content-Type', requote=True,
+ charset=None, language=''):
+ """Set a parameter in the Content-Type header.
+
+ If the parameter already exists in the header, its value will be
+ replaced with the new value.
+
+ If header is Content-Type and has not yet been defined for this
+ message, it will be set to "text/plain" and the new parameter and
+ value will be appended as per RFC 2045.
+
+ An alternate header can specified in the header argument, and all
+ parameters will be quoted as necessary unless requote is False.
+
+ If charset is specified, the parameter will be encoded according to RFC
+ 2231. Optional language specifies the RFC 2231 language, defaulting
+ to the empty string. Both charset and language should be strings.
+ """
+ if not isinstance(value, tuple) and charset:
+ value = (charset, language, value)
+
+ if header not in self and header.lower() == 'content-type':
+ ctype = 'text/plain'
+ else:
+ ctype = self.get(header)
+ if not self.get_param(param, header=header):
+ if not ctype:
+ ctype = _formatparam(param, value, requote)
+ else:
+ ctype = SEMISPACE.join(
+ [ctype, _formatparam(param, value, requote)])
+ else:
+ ctype = ''
+ for old_param, old_value in self.get_params(header=header,
+ unquote=requote):
+ append_param = ''
+ if old_param.lower() == param.lower():
+ append_param = _formatparam(param, value, requote)
+ else:
+ append_param = _formatparam(old_param, old_value, requote)
+ if not ctype:
+ ctype = append_param
+ else:
+ ctype = SEMISPACE.join([ctype, append_param])
+ if ctype != self.get(header):
+ del self[header]
+ self[header] = ctype
+
+ def del_param(self, param, header='content-type', requote=True):
+ """Remove the given parameter completely from the Content-Type header.
+
+ The header will be re-written in place without the parameter or its
+ value. All values will be quoted as necessary unless requote is
+ False. Optional header specifies an alternative to the Content-Type
+ header.
+ """
+ if header not in self:
+ return
+ new_ctype = ''
+ for p, v in self.get_params(header=header, unquote=requote):
+ if p.lower() != param.lower():
+ if not new_ctype:
+ new_ctype = _formatparam(p, v, requote)
+ else:
+ new_ctype = SEMISPACE.join([new_ctype,
+ _formatparam(p, v, requote)])
+ if new_ctype != self.get(header):
+ del self[header]
+ self[header] = new_ctype
+
+ def set_type(self, type, header='Content-Type', requote=True):
+ """Set the main type and subtype for the Content-Type header.
+
+ type must be a string in the form "maintype/subtype", otherwise a
+ ValueError is raised.
+
+ This method replaces the Content-Type header, keeping all the
+ parameters in place. If requote is False, this leaves the existing
+ header's quoting as is. Otherwise, the parameters will be quoted (the
+ default).
+
+ An alternative header can be specified in the header argument. When
+ the Content-Type header is set, we'll always also add a MIME-Version
+ header.
+ """
+ # BAW: should we be strict?
+ if not type.count('/') == 1:
+ raise ValueError
+ # Set the Content-Type, you get a MIME-Version
+ if header.lower() == 'content-type':
+ del self['mime-version']
+ self['MIME-Version'] = '1.0'
+ if header not in self:
+ self[header] = type
+ return
+ params = self.get_params(header=header, unquote=requote)
+ del self[header]
+ self[header] = type
+ # Skip the first param; it's the old type.
+ for p, v in params[1:]:
+ self.set_param(p, v, header, requote)
+
+ def get_filename(self, failobj=None):
+ """Return the filename associated with the payload if present.
+
+ The filename is extracted from the Content-Disposition header's
+ `filename' parameter, and it is unquoted. If that header is missing
+ the `filename' parameter, this method falls back to looking for the
+ `name' parameter.
+ """
+ missing = object()
+ filename = self.get_param('filename', missing, 'content-disposition')
+ if filename is missing:
+ filename = self.get_param('name', missing, 'content-type')
+ if filename is missing:
+ return failobj
+ return utils.collapse_rfc2231_value(filename).strip()
+
+ def get_boundary(self, failobj=None):
+ """Return the boundary associated with the payload if present.
+
+ The boundary is extracted from the Content-Type header's `boundary'
+ parameter, and it is unquoted.
+ """
+ missing = object()
+ boundary = self.get_param('boundary', missing)
+ if boundary is missing:
+ return failobj
+ # RFC 2046 says that boundaries may begin but not end in w/s
+ return utils.collapse_rfc2231_value(boundary).rstrip()
+
+ def set_boundary(self, boundary):
+ """Set the boundary parameter in Content-Type to 'boundary'.
+
+ This is subtly different than deleting the Content-Type header and
+ adding a new one with a new boundary parameter via add_header(). The
+ main difference is that using the set_boundary() method preserves the
+ order of the Content-Type header in the original message.
+
+ HeaderParseError is raised if the message has no Content-Type header.
+ """
+ missing = object()
+ params = self._get_params_preserve(missing, 'content-type')
+ if params is missing:
+ # There was no Content-Type header, and we don't know what type
+ # to set it to, so raise an exception.
+ raise errors.HeaderParseError('No Content-Type header found')
+ newparams = list()
+ foundp = False
+ for pk, pv in params:
+ if pk.lower() == 'boundary':
+ newparams.append(('boundary', '"%s"' % boundary))
+ foundp = True
+ else:
+ newparams.append((pk, pv))
+ if not foundp:
+ # The original Content-Type header had no boundary attribute.
+ # Tack one on the end. BAW: should we raise an exception
+ # instead???
+ newparams.append(('boundary', '"%s"' % boundary))
+ # Replace the existing Content-Type header with the new value
+ newheaders = list()
+ for h, v in self._headers:
+ if h.lower() == 'content-type':
+ parts = list()
+ for k, v in newparams:
+ if v == '':
+ parts.append(k)
+ else:
+ parts.append('%s=%s' % (k, v))
+ val = SEMISPACE.join(parts)
+ newheaders.append(self.policy.header_store_parse(h, val))
+
+ else:
+ newheaders.append((h, v))
+ self._headers = newheaders
+
+ def get_content_charset(self, failobj=None):
+ """Return the charset parameter of the Content-Type header.
+
+ The returned string is always coerced to lower case. If there is no
+ Content-Type header, or if that header has no charset parameter,
+ failobj is returned.
+ """
+ missing = object()
+ charset = self.get_param('charset', missing)
+ if charset is missing:
+ return failobj
+ if isinstance(charset, tuple):
+ # RFC 2231 encoded, so decode it, and it better end up as ascii.
+ pcharset = charset[0] or 'us-ascii'
+ try:
+ # LookupError will be raised if the charset isn't known to
+ # Python. UnicodeError will be raised if the encoded text
+ # contains a character not in the charset.
+ as_bytes = charset[2].encode('raw-unicode-escape')
+ charset = str(as_bytes, pcharset)
+ except (LookupError, UnicodeError):
+ charset = charset[2]
+ # charset characters must be in us-ascii range
+ try:
+ charset.encode('us-ascii')
+ except UnicodeError:
+ return failobj
+ # RFC 2046, $4.1.2 says charsets are not case sensitive
+ return charset.lower()
+
+ def get_charsets(self, failobj=None):
+ """Return a list containing the charset(s) used in this message.
+
+ The returned list of items describes the Content-Type headers'
+ charset parameter for this message and all the subparts in its
+ payload.
+
+ Each item will either be a string (the value of the charset parameter
+ in the Content-Type header of that part) or the value of the
+ 'failobj' parameter (defaults to None), if the part does not have a
+ main MIME type of "text", or the charset is not defined.
+
+ The list will contain one string for each part of the message, plus
+ one for the container message (i.e. self), so that a non-multipart
+ message will still return a list of length 1.
+ """
+ return [part.get_content_charset(failobj) for part in self.walk()]
+
+ # I.e. def walk(self): ...
+ from future.backports.email.iterators import walk
diff --git a/future/standard_library/http/__init__.py b/src/future/backports/email/mime/__init__.py
similarity index 100%
rename from future/standard_library/http/__init__.py
rename to src/future/backports/email/mime/__init__.py
diff --git a/src/future/backports/email/mime/application.py b/src/future/backports/email/mime/application.py
new file mode 100644
index 00000000..5cbfb174
--- /dev/null
+++ b/src/future/backports/email/mime/application.py
@@ -0,0 +1,39 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Keith Dart
+# Contact: email-sig@python.org
+
+"""Class representing application/* type MIME documents."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+from future.backports.email import encoders
+from future.backports.email.mime.nonmultipart import MIMENonMultipart
+
+__all__ = ["MIMEApplication"]
+
+
+class MIMEApplication(MIMENonMultipart):
+ """Class for generating application/* MIME documents."""
+
+ def __init__(self, _data, _subtype='octet-stream',
+ _encoder=encoders.encode_base64, **_params):
+ """Create an application/* type MIME document.
+
+ _data is a string containing the raw application data.
+
+ _subtype is the MIME content type subtype, defaulting to
+ 'octet-stream'.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the application data, defaulting to base64 encoding.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ raise TypeError('Invalid application MIME subtype')
+ MIMENonMultipart.__init__(self, 'application', _subtype, **_params)
+ self.set_payload(_data)
+ _encoder(self)
diff --git a/src/future/backports/email/mime/audio.py b/src/future/backports/email/mime/audio.py
new file mode 100644
index 00000000..4989c114
--- /dev/null
+++ b/src/future/backports/email/mime/audio.py
@@ -0,0 +1,74 @@
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Anthony Baxter
+# Contact: email-sig@python.org
+
+"""Class representing audio/* type MIME documents."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = ['MIMEAudio']
+
+import sndhdr
+
+from io import BytesIO
+from future.backports.email import encoders
+from future.backports.email.mime.nonmultipart import MIMENonMultipart
+
+
+_sndhdr_MIMEmap = {'au' : 'basic',
+ 'wav' :'x-wav',
+ 'aiff':'x-aiff',
+ 'aifc':'x-aiff',
+ }
+
+# There are others in sndhdr that don't have MIME types. :(
+# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma??
+def _whatsnd(data):
+ """Try to identify a sound file type.
+
+ sndhdr.what() has a pretty cruddy interface, unfortunately. This is why
+ we re-do it here. It would be easier to reverse engineer the Unix 'file'
+ command and use the standard 'magic' file, as shipped with a modern Unix.
+ """
+ hdr = data[:512]
+ fakefile = BytesIO(hdr)
+ for testfn in sndhdr.tests:
+ res = testfn(hdr, fakefile)
+ if res is not None:
+ return _sndhdr_MIMEmap.get(res[0])
+ return None
+
+
+class MIMEAudio(MIMENonMultipart):
+ """Class for generating audio/* MIME documents."""
+
+ def __init__(self, _audiodata, _subtype=None,
+ _encoder=encoders.encode_base64, **_params):
+ """Create an audio/* type MIME document.
+
+ _audiodata is a string containing the raw audio data. If this data
+ can be decoded by the standard Python `sndhdr' module, then the
+ subtype will be automatically included in the Content-Type header.
+ Otherwise, you can specify the specific audio subtype via the
+ _subtype parameter. If _subtype is not given, and no subtype can be
+ guessed, a TypeError is raised.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the image data. It takes one argument, which is this
+ Image instance. It should use get_payload() and set_payload() to
+ change the payload to the encoded form. It should also add any
+ Content-Transfer-Encoding or other headers to the message as
+ necessary. The default encoding is Base64.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ _subtype = _whatsnd(_audiodata)
+ if _subtype is None:
+ raise TypeError('Could not find audio MIME subtype')
+ MIMENonMultipart.__init__(self, 'audio', _subtype, **_params)
+ self.set_payload(_audiodata)
+ _encoder(self)
diff --git a/src/future/backports/email/mime/base.py b/src/future/backports/email/mime/base.py
new file mode 100644
index 00000000..e77f3ca4
--- /dev/null
+++ b/src/future/backports/email/mime/base.py
@@ -0,0 +1,25 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME specializations."""
+from __future__ import absolute_import, division, unicode_literals
+from future.backports.email import message
+
+__all__ = ['MIMEBase']
+
+
+class MIMEBase(message.Message):
+ """Base class for MIME specializations."""
+
+ def __init__(self, _maintype, _subtype, **_params):
+ """This constructor adds a Content-Type: and a MIME-Version: header.
+
+ The Content-Type: header is taken from the _maintype and _subtype
+ arguments. Additional parameters for this header are taken from the
+ keyword arguments.
+ """
+ message.Message.__init__(self)
+ ctype = '%s/%s' % (_maintype, _subtype)
+ self.add_header('Content-Type', ctype, **_params)
+ self['MIME-Version'] = '1.0'
diff --git a/src/future/backports/email/mime/image.py b/src/future/backports/email/mime/image.py
new file mode 100644
index 00000000..a0360246
--- /dev/null
+++ b/src/future/backports/email/mime/image.py
@@ -0,0 +1,48 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing image/* type MIME documents."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = ['MIMEImage']
+
+import imghdr
+
+from future.backports.email import encoders
+from future.backports.email.mime.nonmultipart import MIMENonMultipart
+
+
+class MIMEImage(MIMENonMultipart):
+ """Class for generating image/* type MIME documents."""
+
+ def __init__(self, _imagedata, _subtype=None,
+ _encoder=encoders.encode_base64, **_params):
+ """Create an image/* type MIME document.
+
+ _imagedata is a string containing the raw image data. If this data
+ can be decoded by the standard Python `imghdr' module, then the
+ subtype will be automatically included in the Content-Type header.
+ Otherwise, you can specify the specific image subtype via the _subtype
+ parameter.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the image data. It takes one argument, which is this
+ Image instance. It should use get_payload() and set_payload() to
+ change the payload to the encoded form. It should also add any
+ Content-Transfer-Encoding or other headers to the message as
+ necessary. The default encoding is Base64.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ _subtype = imghdr.what(None, _imagedata)
+ if _subtype is None:
+ raise TypeError('Could not guess image MIME subtype')
+ MIMENonMultipart.__init__(self, 'image', _subtype, **_params)
+ self.set_payload(_imagedata)
+ _encoder(self)
diff --git a/src/future/backports/email/mime/message.py b/src/future/backports/email/mime/message.py
new file mode 100644
index 00000000..7f920751
--- /dev/null
+++ b/src/future/backports/email/mime/message.py
@@ -0,0 +1,36 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing message/* MIME documents."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = ['MIMEMessage']
+
+from future.backports.email import message
+from future.backports.email.mime.nonmultipart import MIMENonMultipart
+
+
+class MIMEMessage(MIMENonMultipart):
+ """Class representing message/* MIME documents."""
+
+ def __init__(self, _msg, _subtype='rfc822'):
+ """Create a message/* type MIME document.
+
+ _msg is a message object and must be an instance of Message, or a
+ derived class of Message, otherwise a TypeError is raised.
+
+ Optional _subtype defines the subtype of the contained message. The
+ default is "rfc822" (this is defined by the MIME standard, even though
+ the term "rfc822" is technically outdated by RFC 2822).
+ """
+ MIMENonMultipart.__init__(self, 'message', _subtype)
+ if not isinstance(_msg, message.Message):
+ raise TypeError('Argument is not an instance of Message')
+ # It's convenient to use this base class method. We need to do it
+ # this way or we'll get an exception
+ message.Message.attach(self, _msg)
+ # And be sure our default type is set correctly
+ self.set_default_type('message/rfc822')
diff --git a/src/future/backports/email/mime/multipart.py b/src/future/backports/email/mime/multipart.py
new file mode 100644
index 00000000..6d7ed3dc
--- /dev/null
+++ b/src/future/backports/email/mime/multipart.py
@@ -0,0 +1,49 @@
+# Copyright (C) 2002-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME multipart/* type messages."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = ['MIMEMultipart']
+
+from future.backports.email.mime.base import MIMEBase
+
+
+class MIMEMultipart(MIMEBase):
+ """Base class for MIME multipart/* type messages."""
+
+ def __init__(self, _subtype='mixed', boundary=None, _subparts=None,
+ **_params):
+ """Creates a multipart/* type message.
+
+ By default, creates a multipart/mixed message, with proper
+ Content-Type and MIME-Version headers.
+
+ _subtype is the subtype of the multipart content type, defaulting to
+ `mixed'.
+
+ boundary is the multipart boundary string. By default it is
+ calculated as needed.
+
+ _subparts is a sequence of initial subparts for the payload. It
+ must be an iterable object, such as a list. You can always
+ attach new subparts to the message by using the attach() method.
+
+ Additional parameters for the Content-Type header are taken from the
+ keyword arguments (or passed into the _params argument).
+ """
+ MIMEBase.__init__(self, 'multipart', _subtype, **_params)
+
+ # Initialise _payload to an empty list as the Message superclass's
+ # implementation of is_multipart assumes that _payload is a list for
+ # multipart messages.
+ self._payload = []
+
+ if _subparts:
+ for p in _subparts:
+ self.attach(p)
+ if boundary:
+ self.set_boundary(boundary)
diff --git a/src/future/backports/email/mime/nonmultipart.py b/src/future/backports/email/mime/nonmultipart.py
new file mode 100644
index 00000000..08c37c36
--- /dev/null
+++ b/src/future/backports/email/mime/nonmultipart.py
@@ -0,0 +1,24 @@
+# Copyright (C) 2002-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME type messages that are not multipart."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = ['MIMENonMultipart']
+
+from future.backports.email import errors
+from future.backports.email.mime.base import MIMEBase
+
+
+class MIMENonMultipart(MIMEBase):
+ """Base class for MIME multipart/* type messages."""
+
+ def attach(self, payload):
+ # The public API prohibits attaching multiple subparts to MIMEBase
+ # derived subtypes since none of them are, by definition, of content
+ # type multipart/*
+ raise errors.MultipartConversionError(
+ 'Cannot attach additional subparts to non-multipart/*')
diff --git a/src/future/backports/email/mime/text.py b/src/future/backports/email/mime/text.py
new file mode 100644
index 00000000..6269f4a6
--- /dev/null
+++ b/src/future/backports/email/mime/text.py
@@ -0,0 +1,44 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing text/* type MIME documents."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = ['MIMEText']
+
+from future.backports.email.encoders import encode_7or8bit
+from future.backports.email.mime.nonmultipart import MIMENonMultipart
+
+
+class MIMEText(MIMENonMultipart):
+ """Class for generating text/* type MIME documents."""
+
+ def __init__(self, _text, _subtype='plain', _charset=None):
+ """Create a text/* type MIME document.
+
+ _text is the string for this message object.
+
+ _subtype is the MIME sub content type, defaulting to "plain".
+
+ _charset is the character set parameter added to the Content-Type
+ header. This defaults to "us-ascii". Note that as a side-effect, the
+ Content-Transfer-Encoding header will also be set.
+ """
+
+ # If no _charset was specified, check to see if there are non-ascii
+ # characters present. If not, use 'us-ascii', otherwise use utf-8.
+ # XXX: This can be removed once #7304 is fixed.
+ if _charset is None:
+ try:
+ _text.encode('us-ascii')
+ _charset = 'us-ascii'
+ except UnicodeEncodeError:
+ _charset = 'utf-8'
+
+ MIMENonMultipart.__init__(self, 'text', _subtype,
+ **{'charset': _charset})
+
+ self.set_payload(_text, _charset)
diff --git a/src/future/backports/email/parser.py b/src/future/backports/email/parser.py
new file mode 100644
index 00000000..79f0e5a3
--- /dev/null
+++ b/src/future/backports/email/parser.py
@@ -0,0 +1,135 @@
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
+# Contact: email-sig@python.org
+
+"""A parser of RFC 2822 and MIME email messages."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
+
+import warnings
+from io import StringIO, TextIOWrapper
+
+from future.backports.email.feedparser import FeedParser, BytesFeedParser
+from future.backports.email.message import Message
+from future.backports.email._policybase import compat32
+
+
+class Parser(object):
+ def __init__(self, _class=Message, **_3to2kwargs):
+ """Parser of RFC 2822 and MIME email messages.
+
+ Creates an in-memory object tree representing the email message, which
+ can then be manipulated and turned over to a Generator to return the
+ textual representation of the message.
+
+ The string must be formatted as a block of RFC 2822 headers and header
+ continuation lines, optionally preceded by a `Unix-from' header. The
+ header block is terminated either by the end of the string or by a
+ blank line.
+
+ _class is the class to instantiate for new message objects when they
+ must be created. This class must have a constructor that can take
+ zero arguments. Default is Message.Message.
+
+ The policy keyword specifies a policy object that controls a number of
+ aspects of the parser's operation. The default policy maintains
+ backward compatibility.
+
+ """
+ if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
+ else: policy = compat32
+ self._class = _class
+ self.policy = policy
+
+ def parse(self, fp, headersonly=False):
+ """Create a message structure from the data in a file.
+
+ Reads all the data from the file and returns the root of the message
+ structure. Optional headersonly is a flag specifying whether to stop
+ parsing after reading the headers or not. The default is False,
+ meaning it parses the entire contents of the file.
+ """
+ feedparser = FeedParser(self._class, policy=self.policy)
+ if headersonly:
+ feedparser._set_headersonly()
+ while True:
+ data = fp.read(8192)
+ if not data:
+ break
+ feedparser.feed(data)
+ return feedparser.close()
+
+ def parsestr(self, text, headersonly=False):
+ """Create a message structure from a string.
+
+ Returns the root of the message structure. Optional headersonly is a
+ flag specifying whether to stop parsing after reading the headers or
+ not. The default is False, meaning it parses the entire contents of
+ the file.
+ """
+ return self.parse(StringIO(text), headersonly=headersonly)
+
+
+
+class HeaderParser(Parser):
+ def parse(self, fp, headersonly=True):
+ return Parser.parse(self, fp, True)
+
+ def parsestr(self, text, headersonly=True):
+ return Parser.parsestr(self, text, True)
+
+
+class BytesParser(object):
+
+ def __init__(self, *args, **kw):
+ """Parser of binary RFC 2822 and MIME email messages.
+
+ Creates an in-memory object tree representing the email message, which
+ can then be manipulated and turned over to a Generator to return the
+ textual representation of the message.
+
+ The input must be formatted as a block of RFC 2822 headers and header
+ continuation lines, optionally preceded by a `Unix-from' header. The
+ header block is terminated either by the end of the input or by a
+ blank line.
+
+ _class is the class to instantiate for new message objects when they
+ must be created. This class must have a constructor that can take
+ zero arguments. Default is Message.Message.
+ """
+ self.parser = Parser(*args, **kw)
+
+ def parse(self, fp, headersonly=False):
+ """Create a message structure from the data in a binary file.
+
+ Reads all the data from the file and returns the root of the message
+ structure. Optional headersonly is a flag specifying whether to stop
+ parsing after reading the headers or not. The default is False,
+ meaning it parses the entire contents of the file.
+ """
+ fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
+ with fp:
+ return self.parser.parse(fp, headersonly)
+
+
+ def parsebytes(self, text, headersonly=False):
+ """Create a message structure from a byte string.
+
+ Returns the root of the message structure. Optional headersonly is a
+ flag specifying whether to stop parsing after reading the headers or
+ not. The default is False, meaning it parses the entire contents of
+ the file.
+ """
+ text = text.decode('ASCII', errors='surrogateescape')
+ return self.parser.parsestr(text, headersonly)
+
+
+class BytesHeaderParser(BytesParser):
+ def parse(self, fp, headersonly=True):
+ return BytesParser.parse(self, fp, headersonly=True)
+
+ def parsebytes(self, text, headersonly=True):
+ return BytesParser.parsebytes(self, text, headersonly=True)
diff --git a/src/future/backports/email/policy.py b/src/future/backports/email/policy.py
new file mode 100644
index 00000000..2f609a23
--- /dev/null
+++ b/src/future/backports/email/policy.py
@@ -0,0 +1,193 @@
+"""This will be the home for the policy that hooks in the new
+code that adds all the email6 features.
+"""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import super
+
+from future.standard_library.email._policybase import (Policy, Compat32,
+ compat32, _extend_docstrings)
+from future.standard_library.email.utils import _has_surrogates
+from future.standard_library.email.headerregistry import HeaderRegistry as HeaderRegistry
+
+__all__ = [
+ 'Compat32',
+ 'compat32',
+ 'Policy',
+ 'EmailPolicy',
+ 'default',
+ 'strict',
+ 'SMTP',
+ 'HTTP',
+ ]
+
+@_extend_docstrings
+class EmailPolicy(Policy):
+
+ """+
+ PROVISIONAL
+
+ The API extensions enabled by this policy are currently provisional.
+ Refer to the documentation for details.
+
+ This policy adds new header parsing and folding algorithms. Instead of
+ simple strings, headers are custom objects with custom attributes
+ depending on the type of the field. The folding algorithm fully
+ implements RFCs 2047 and 5322.
+
+ In addition to the settable attributes listed above that apply to
+ all Policies, this policy adds the following additional attributes:
+
+ refold_source -- if the value for a header in the Message object
+ came from the parsing of some source, this attribute
+ indicates whether or not a generator should refold
+ that value when transforming the message back into
+ stream form. The possible values are:
+
+ none -- all source values use original folding
+ long -- source values that have any line that is
+ longer than max_line_length will be
+ refolded
+ all -- all values are refolded.
+
+ The default is 'long'.
+
+ header_factory -- a callable that takes two arguments, 'name' and
+ 'value', where 'name' is a header field name and
+ 'value' is an unfolded header field value, and
+ returns a string-like object that represents that
+ header. A default header_factory is provided that
+ understands some of the RFC5322 header field types.
+ (Currently address fields and date fields have
+ special treatment, while all other fields are
+ treated as unstructured. This list will be
+ completed before the extension is marked stable.)
+ """
+
+ refold_source = 'long'
+ header_factory = HeaderRegistry()
+
+ def __init__(self, **kw):
+ # Ensure that each new instance gets a unique header factory
+ # (as opposed to clones, which share the factory).
+ if 'header_factory' not in kw:
+ object.__setattr__(self, 'header_factory', HeaderRegistry())
+ super().__init__(**kw)
+
+ def header_max_count(self, name):
+ """+
+ The implementation for this class returns the max_count attribute from
+ the specialized header class that would be used to construct a header
+ of type 'name'.
+ """
+ return self.header_factory[name].max_count
+
+ # The logic of the next three methods is chosen such that it is possible to
+ # switch a Message object between a Compat32 policy and a policy derived
+ # from this class and have the results stay consistent. This allows a
+ # Message object constructed with this policy to be passed to a library
+ # that only handles Compat32 objects, or to receive such an object and
+ # convert it to use the newer style by just changing its policy. It is
+ # also chosen because it postpones the relatively expensive full rfc5322
+ # parse until as late as possible when parsing from source, since in many
+ # applications only a few headers will actually be inspected.
+
+ def header_source_parse(self, sourcelines):
+ """+
+ The name is parsed as everything up to the ':' and returned unmodified.
+ The value is determined by stripping leading whitespace off the
+ remainder of the first line, joining all subsequent lines together, and
+ stripping any trailing carriage return or linefeed characters. (This
+ is the same as Compat32).
+
+ """
+ name, value = sourcelines[0].split(':', 1)
+ value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+ return (name, value.rstrip('\r\n'))
+
+ def header_store_parse(self, name, value):
+ """+
+ The name is returned unchanged. If the input value has a 'name'
+ attribute and it matches the name ignoring case, the value is returned
+ unchanged. Otherwise the name and value are passed to header_factory
+ method, and the resulting custom header object is returned as the
+ value. In this case a ValueError is raised if the input value contains
+ CR or LF characters.
+
+ """
+ if hasattr(value, 'name') and value.name.lower() == name.lower():
+ return (name, value)
+ if isinstance(value, str) and len(value.splitlines())>1:
+ raise ValueError("Header values may not contain linefeed "
+ "or carriage return characters")
+ return (name, self.header_factory(name, value))
+
+ def header_fetch_parse(self, name, value):
+ """+
+ If the value has a 'name' attribute, it is returned to unmodified.
+ Otherwise the name and the value with any linesep characters removed
+ are passed to the header_factory method, and the resulting custom
+ header object is returned. Any surrogateescaped bytes get turned
+ into the unicode unknown-character glyph.
+
+ """
+ if hasattr(value, 'name'):
+ return value
+ return self.header_factory(name, ''.join(value.splitlines()))
+
+ def fold(self, name, value):
+ """+
+ Header folding is controlled by the refold_source policy setting. A
+ value is considered to be a 'source value' if and only if it does not
+ have a 'name' attribute (having a 'name' attribute means it is a header
+ object of some sort). If a source value needs to be refolded according
+ to the policy, it is converted into a custom header object by passing
+ the name and the value with any linesep characters removed to the
+ header_factory method. Folding of a custom header object is done by
+ calling its fold method with the current policy.
+
+ Source values are split into lines using splitlines. If the value is
+ not to be refolded, the lines are rejoined using the linesep from the
+ policy and returned. The exception is lines containing non-ascii
+ binary data. In that case the value is refolded regardless of the
+ refold_source setting, which causes the binary data to be CTE encoded
+ using the unknown-8bit charset.
+
+ """
+ return self._fold(name, value, refold_binary=True)
+
+ def fold_binary(self, name, value):
+ """+
+ The same as fold if cte_type is 7bit, except that the returned value is
+ bytes.
+
+ If cte_type is 8bit, non-ASCII binary data is converted back into
+ bytes. Headers with binary data are not refolded, regardless of the
+ refold_header setting, since there is no way to know whether the binary
+ data consists of single byte characters or multibyte characters.
+
+ """
+ folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
+ return folded.encode('ascii', 'surrogateescape')
+
+ def _fold(self, name, value, refold_binary=False):
+ if hasattr(value, 'name'):
+ return value.fold(policy=self)
+ maxlen = self.max_line_length if self.max_line_length else float('inf')
+ lines = value.splitlines()
+ refold = (self.refold_source == 'all' or
+ self.refold_source == 'long' and
+ (lines and len(lines[0])+len(name)+2 > maxlen or
+ any(len(x) > maxlen for x in lines[1:])))
+ if refold or refold_binary and _has_surrogates(value):
+ return self.header_factory(name, ''.join(lines)).fold(policy=self)
+ return name + ': ' + self.linesep.join(lines) + self.linesep
+
+
+default = EmailPolicy()
+# Make the default policy use the class default header_factory
+del default.header_factory
+strict = default.clone(raise_on_defect=True)
+SMTP = default.clone(linesep='\r\n')
+HTTP = default.clone(linesep='\r\n', max_line_length=None)
diff --git a/src/future/backports/email/quoprimime.py b/src/future/backports/email/quoprimime.py
new file mode 100644
index 00000000..b69d158b
--- /dev/null
+++ b/src/future/backports/email/quoprimime.py
@@ -0,0 +1,326 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Ben Gertzfield
+# Contact: email-sig@python.org
+
+"""Quoted-printable content transfer encoding per RFCs 2045-2047.
+
+This module handles the content transfer encoding method defined in RFC 2045
+to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to
+safely encode text that is in a character set similar to the 7-bit US ASCII
+character set, but that includes some 8-bit characters that are normally not
+allowed in email bodies or headers.
+
+Quoted-printable is very space-inefficient for encoding binary files; use the
+email.base64mime module for that instead.
+
+This module provides an interface to encode and decode both headers and bodies
+with quoted-printable encoding.
+
+RFC 2045 defines a method for including character set information in an
+`encoded-word' in a header. This method is commonly used for 8-bit real names
+in To:/From:/Cc: etc. fields, as well as Subject: lines.
+
+This module does not do the line wrapping or end-of-line character
+conversion necessary for proper internationalized headers; it only
+does dumb encoding and decoding. To deal with the various line
+wrapping issues, use the email.header module.
+"""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import bytes, chr, dict, int, range, super
+
+__all__ = [
+ 'body_decode',
+ 'body_encode',
+ 'body_length',
+ 'decode',
+ 'decodestring',
+ 'header_decode',
+ 'header_encode',
+ 'header_length',
+ 'quote',
+ 'unquote',
+ ]
+
+import re
+import io
+
+from string import ascii_letters, digits, hexdigits
+
+CRLF = '\r\n'
+NL = '\n'
+EMPTYSTRING = ''
+
+# Build a mapping of octets to the expansion of that octet. Since we're only
+# going to have 256 of these things, this isn't terribly inefficient
+# space-wise. Remember that headers and bodies have different sets of safe
+# characters. Initialize both maps with the full expansion, and then override
+# the safe bytes with the more compact form.
+_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256))
+_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy()
+
+# Safe header bytes which need no encoding.
+for c in bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')):
+ _QUOPRI_HEADER_MAP[c] = chr(c)
+# Headers have one other special encoding; spaces become underscores.
+_QUOPRI_HEADER_MAP[ord(' ')] = '_'
+
+# Safe body bytes which need no encoding.
+for c in bytes(b' !"#$%&\'()*+,-./0123456789:;<>'
+ b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`'
+ b'abcdefghijklmnopqrstuvwxyz{|}~\t'):
+ _QUOPRI_BODY_MAP[c] = chr(c)
+
+
+
+# Helpers
+def header_check(octet):
+ """Return True if the octet should be escaped with header quopri."""
+ return chr(octet) != _QUOPRI_HEADER_MAP[octet]
+
+
+def body_check(octet):
+ """Return True if the octet should be escaped with body quopri."""
+ return chr(octet) != _QUOPRI_BODY_MAP[octet]
+
+
+def header_length(bytearray):
+ """Return a header quoted-printable encoding length.
+
+ Note that this does not include any RFC 2047 chrome added by
+ `header_encode()`.
+
+ :param bytearray: An array of bytes (a.k.a. octets).
+ :return: The length in bytes of the byte array when it is encoded with
+ quoted-printable for headers.
+ """
+ return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray)
+
+
+def body_length(bytearray):
+ """Return a body quoted-printable encoding length.
+
+ :param bytearray: An array of bytes (a.k.a. octets).
+ :return: The length in bytes of the byte array when it is encoded with
+ quoted-printable for bodies.
+ """
+ return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray)
+
+
+def _max_append(L, s, maxlen, extra=''):
+ if not isinstance(s, str):
+ s = chr(s)
+ if not L:
+ L.append(s.lstrip())
+ elif len(L[-1]) + len(s) <= maxlen:
+ L[-1] += extra + s
+ else:
+ L.append(s.lstrip())
+
+
+def unquote(s):
+ """Turn a string in the form =AB to the ASCII character with value 0xab"""
+ return chr(int(s[1:3], 16))
+
+
+def quote(c):
+ return '=%02X' % ord(c)
+
+
+
+def header_encode(header_bytes, charset='iso-8859-1'):
+ """Encode a single header line with quoted-printable (like) encoding.
+
+ Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
+ used specifically for email header fields to allow charsets with mostly 7
+ bit characters (and some 8 bit) to remain more or less readable in non-RFC
+ 2045 aware mail clients.
+
+ charset names the character set to use in the RFC 2046 header. It
+ defaults to iso-8859-1.
+ """
+ # Return empty headers as an empty string.
+ if not header_bytes:
+ return ''
+ # Iterate over every byte, encoding if necessary.
+ encoded = []
+ for octet in header_bytes:
+ encoded.append(_QUOPRI_HEADER_MAP[octet])
+ # Now add the RFC chrome to each encoded chunk and glue the chunks
+ # together.
+ return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))
+
+
+class _body_accumulator(io.StringIO):
+
+ def __init__(self, maxlinelen, eol, *args, **kw):
+ super().__init__(*args, **kw)
+ self.eol = eol
+ self.maxlinelen = self.room = maxlinelen
+
+ def write_str(self, s):
+ """Add string s to the accumulated body."""
+ self.write(s)
+ self.room -= len(s)
+
+ def newline(self):
+ """Write eol, then start new line."""
+ self.write_str(self.eol)
+ self.room = self.maxlinelen
+
+ def write_soft_break(self):
+ """Write a soft break, then start a new line."""
+ self.write_str('=')
+ self.newline()
+
+ def write_wrapped(self, s, extra_room=0):
+ """Add a soft line break if needed, then write s."""
+ if self.room < len(s) + extra_room:
+ self.write_soft_break()
+ self.write_str(s)
+
+ def write_char(self, c, is_last_char):
+ if not is_last_char:
+ # Another character follows on this line, so we must leave
+ # extra room, either for it or a soft break, and whitespace
+ # need not be quoted.
+ self.write_wrapped(c, extra_room=1)
+ elif c not in ' \t':
+ # For this and remaining cases, no more characters follow,
+ # so there is no need to reserve extra room (since a hard
+ # break will immediately follow).
+ self.write_wrapped(c)
+ elif self.room >= 3:
+ # It's a whitespace character at end-of-line, and we have room
+ # for the three-character quoted encoding.
+ self.write(quote(c))
+ elif self.room == 2:
+ # There's room for the whitespace character and a soft break.
+ self.write(c)
+ self.write_soft_break()
+ else:
+ # There's room only for a soft break. The quoted whitespace
+ # will be the only content on the subsequent line.
+ self.write_soft_break()
+ self.write(quote(c))
+
+
+def body_encode(body, maxlinelen=76, eol=NL):
+ """Encode with quoted-printable, wrapping at maxlinelen characters.
+
+ Each line of encoded text will end with eol, which defaults to "\\n". Set
+ this to "\\r\\n" if you will be using the result of this function directly
+ in an email.
+
+ Each line will be wrapped at, at most, maxlinelen characters before the
+ eol string (maxlinelen defaults to 76 characters, the maximum value
+ permitted by RFC 2045). Long lines will have the 'soft line break'
+ quoted-printable character "=" appended to them, so the decoded text will
+ be identical to the original text.
+
+ The minimum maxlinelen is 4 to have room for a quoted character ("=XX")
+ followed by a soft line break. Smaller values will generate a
+ ValueError.
+
+ """
+
+ if maxlinelen < 4:
+ raise ValueError("maxlinelen must be at least 4")
+ if not body:
+ return body
+
+ # The last line may or may not end in eol, but all other lines do.
+ last_has_eol = (body[-1] in '\r\n')
+
+ # This accumulator will make it easier to build the encoded body.
+ encoded_body = _body_accumulator(maxlinelen, eol)
+
+ lines = body.splitlines()
+ last_line_no = len(lines) - 1
+ for line_no, line in enumerate(lines):
+ last_char_index = len(line) - 1
+ for i, c in enumerate(line):
+ if body_check(ord(c)):
+ c = quote(c)
+ encoded_body.write_char(c, i==last_char_index)
+ # Add an eol if input line had eol. All input lines have eol except
+ # possibly the last one.
+ if line_no < last_line_no or last_has_eol:
+ encoded_body.newline()
+
+ return encoded_body.getvalue()
+
+
+
+# BAW: I'm not sure if the intent was for the signature of this function to be
+# the same as base64MIME.decode() or not...
+def decode(encoded, eol=NL):
+ """Decode a quoted-printable string.
+
+ Lines are separated with eol, which defaults to \\n.
+ """
+ if not encoded:
+ return encoded
+ # BAW: see comment in encode() above. Again, we're building up the
+ # decoded string with string concatenation, which could be done much more
+ # efficiently.
+ decoded = ''
+
+ for line in encoded.splitlines():
+ line = line.rstrip()
+ if not line:
+ decoded += eol
+ continue
+
+ i = 0
+ n = len(line)
+ while i < n:
+ c = line[i]
+ if c != '=':
+ decoded += c
+ i += 1
+ # Otherwise, c == "=". Are we at the end of the line? If so, add
+ # a soft line break.
+ elif i+1 == n:
+ i += 1
+ continue
+ # Decode if in form =AB
+ elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:
+ decoded += unquote(line[i:i+3])
+ i += 3
+ # Otherwise, not in form =AB, pass literally
+ else:
+ decoded += c
+ i += 1
+
+ if i == n:
+ decoded += eol
+ # Special case if original string did not end with eol
+ if encoded[-1] not in '\r\n' and decoded.endswith(eol):
+ decoded = decoded[:-1]
+ return decoded
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_decode = decode
+decodestring = decode
+
+
+
+def _unquote_match(match):
+ """Turn a match in the form =AB to the ASCII character with value 0xab"""
+ s = match.group(0)
+ return unquote(s)
+
+
+# Header decoding is done a bit differently
+def header_decode(s):
+ """Decode a string encoded with RFC 2045 MIME header `Q' encoding.
+
+ This function does not parse a full MIME header value encoded with
+ quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use
+ the high level email.header class for that functionality.
+ """
+ s = s.replace('_', ' ')
+ return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII)
diff --git a/src/future/backports/email/utils.py b/src/future/backports/email/utils.py
new file mode 100644
index 00000000..4abebf7c
--- /dev/null
+++ b/src/future/backports/email/utils.py
@@ -0,0 +1,400 @@
+# Copyright (C) 2001-2010 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Miscellaneous utilities."""
+
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future import utils
+from future.builtins import bytes, int, str
+
+__all__ = [
+ 'collapse_rfc2231_value',
+ 'decode_params',
+ 'decode_rfc2231',
+ 'encode_rfc2231',
+ 'formataddr',
+ 'formatdate',
+ 'format_datetime',
+ 'getaddresses',
+ 'make_msgid',
+ 'mktime_tz',
+ 'parseaddr',
+ 'parsedate',
+ 'parsedate_tz',
+ 'parsedate_to_datetime',
+ 'unquote',
+ ]
+
+import os
+import re
+if utils.PY2:
+ re.ASCII = 0
+import time
+import base64
+import random
+import socket
+from future.backports import datetime
+from future.backports.urllib.parse import quote as url_quote, unquote as url_unquote
+import warnings
+from io import StringIO
+
+from future.backports.email._parseaddr import quote
+from future.backports.email._parseaddr import AddressList as _AddressList
+from future.backports.email._parseaddr import mktime_tz
+
+from future.backports.email._parseaddr import parsedate, parsedate_tz, _parsedate_tz
+
+from quopri import decodestring as _qdecode
+
+# Intrapackage imports
+from future.backports.email.encoders import _bencode, _qencode
+from future.backports.email.charset import Charset
+
+COMMASPACE = ', '
+EMPTYSTRING = ''
+UEMPTYSTRING = ''
+CRLF = '\r\n'
+TICK = "'"
+
+specialsre = re.compile(r'[][\\()<>@,:;".]')
+escapesre = re.compile(r'[\\"]')
+
+# How to figure out if we are processing strings that come from a byte
+# source with undecodable characters.
+_has_surrogates = re.compile(
+ '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
+
+# How to deal with a string containing bytes before handing it to the
+# application through the 'normal' interface.
+def _sanitize(string):
+ # Turn any escaped bytes into unicode 'unknown' char.
+ original_bytes = string.encode('ascii', 'surrogateescape')
+ return original_bytes.decode('ascii', 'replace')
+
+
+# Helpers
+
+def formataddr(pair, charset='utf-8'):
+ """The inverse of parseaddr(), this takes a 2-tuple of the form
+ (realname, email_address) and returns the string value suitable
+ for an RFC 2822 From, To or Cc header.
+
+ If the first element of pair is false, then the second element is
+ returned unmodified.
+
+ Optional charset if given is the character set that is used to encode
+ realname in case realname is not ASCII safe. Can be an instance of str or
+ a Charset-like object which has a header_encode method. Default is
+ 'utf-8'.
+ """
+ name, address = pair
+ # The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't.
+ address.encode('ascii')
+ if name:
+ try:
+ name.encode('ascii')
+ except UnicodeEncodeError:
+ if isinstance(charset, str):
+ charset = Charset(charset)
+ encoded_name = charset.header_encode(name)
+ return "%s <%s>" % (encoded_name, address)
+ else:
+ quotes = ''
+ if specialsre.search(name):
+ quotes = '"'
+ name = escapesre.sub(r'\\\g<0>', name)
+ return '%s%s%s <%s>' % (quotes, name, quotes, address)
+ return address
+
+
+
+def getaddresses(fieldvalues):
+ """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
+ all = COMMASPACE.join(fieldvalues)
+ a = _AddressList(all)
+ return a.addresslist
+
+
+
+ecre = re.compile(r'''
+ =\? # literal =?
+ (?P[^?]*?) # non-greedy up to the next ? is the charset
+ \? # literal ?
+ (?P[qb]) # either a "q" or a "b", case insensitive
+ \? # literal ?
+ (?P.*?) # non-greedy up to the next ?= is the atom
+ \?= # literal ?=
+ ''', re.VERBOSE | re.IGNORECASE)
+
+
+def _format_timetuple_and_zone(timetuple, zone):
+ return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
+ ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],
+ timetuple[2],
+ ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],
+ timetuple[0], timetuple[3], timetuple[4], timetuple[5],
+ zone)
+
+def formatdate(timeval=None, localtime=False, usegmt=False):
+ """Returns a date string as specified by RFC 2822, e.g.:
+
+ Fri, 09 Nov 2001 01:08:47 -0000
+
+ Optional timeval if given is a floating point time value as accepted by
+ gmtime() and localtime(), otherwise the current time is used.
+
+ Optional localtime is a flag that when True, interprets timeval, and
+ returns a date relative to the local timezone instead of UTC, properly
+ taking daylight savings time into account.
+
+ Optional argument usegmt means that the timezone is written out as
+ an ascii string, not numeric one (so "GMT" instead of "+0000"). This
+ is needed for HTTP, and is only used when localtime==False.
+ """
+ # Note: we cannot use strftime() because that honors the locale and RFC
+ # 2822 requires that day and month names be the English abbreviations.
+ if timeval is None:
+ timeval = time.time()
+ if localtime:
+ now = time.localtime(timeval)
+ # Calculate timezone offset, based on whether the local zone has
+ # daylight savings time, and whether DST is in effect.
+ if time.daylight and now[-1]:
+ offset = time.altzone
+ else:
+ offset = time.timezone
+ hours, minutes = divmod(abs(offset), 3600)
+ # Remember offset is in seconds west of UTC, but the timezone is in
+ # minutes east of UTC, so the signs differ.
+ if offset > 0:
+ sign = '-'
+ else:
+ sign = '+'
+ zone = '%s%02d%02d' % (sign, hours, minutes // 60)
+ else:
+ now = time.gmtime(timeval)
+ # Timezone offset is always -0000
+ if usegmt:
+ zone = 'GMT'
+ else:
+ zone = '-0000'
+ return _format_timetuple_and_zone(now, zone)
+
+def format_datetime(dt, usegmt=False):
+ """Turn a datetime into a date string as specified in RFC 2822.
+
+ If usegmt is True, dt must be an aware datetime with an offset of zero. In
+ this case 'GMT' will be rendered instead of the normal +0000 required by
+ RFC2822. This is to support HTTP headers involving date stamps.
+ """
+ now = dt.timetuple()
+ if usegmt:
+ if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc:
+ raise ValueError("usegmt option requires a UTC datetime")
+ zone = 'GMT'
+ elif dt.tzinfo is None:
+ zone = '-0000'
+ else:
+ zone = dt.strftime("%z")
+ return _format_timetuple_and_zone(now, zone)
+
+
+def make_msgid(idstring=None, domain=None):
+ """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
+
+ <20020201195627.33539.96671@nightshade.la.mastaler.com>
+
+ Optional idstring if given is a string used to strengthen the
+ uniqueness of the message id. Optional domain if given provides the
+ portion of the message id after the '@'. It defaults to the locally
+ defined hostname.
+ """
+ timeval = time.time()
+ utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
+ pid = os.getpid()
+ randint = random.randrange(100000)
+ if idstring is None:
+ idstring = ''
+ else:
+ idstring = '.' + idstring
+ if domain is None:
+ domain = socket.getfqdn()
+ msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain)
+ return msgid
+
+
+def parsedate_to_datetime(data):
+ _3to2list = list(_parsedate_tz(data))
+ dtuple, tz, = [_3to2list[:-1]] + _3to2list[-1:]
+ if tz is None:
+ return datetime.datetime(*dtuple[:6])
+ return datetime.datetime(*dtuple[:6],
+ tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
+
+
+def parseaddr(addr):
+ addrs = _AddressList(addr).addresslist
+ if not addrs:
+ return '', ''
+ return addrs[0]
+
+
+# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
+def unquote(str):
+ """Remove quotes from a string."""
+ if len(str) > 1:
+ if str.startswith('"') and str.endswith('"'):
+ return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
+ if str.startswith('<') and str.endswith('>'):
+ return str[1:-1]
+ return str
+
+
+
+# RFC2231-related functions - parameter encoding and decoding
+def decode_rfc2231(s):
+ """Decode string according to RFC 2231"""
+ parts = s.split(TICK, 2)
+ if len(parts) <= 2:
+ return None, None, s
+ return parts
+
+
+def encode_rfc2231(s, charset=None, language=None):
+ """Encode string according to RFC 2231.
+
+ If neither charset nor language is given, then s is returned as-is. If
+ charset is given but not language, the string is encoded using the empty
+ string for language.
+ """
+ s = url_quote(s, safe='', encoding=charset or 'ascii')
+ if charset is None and language is None:
+ return s
+ if language is None:
+ language = ''
+ return "%s'%s'%s" % (charset, language, s)
+
+
+rfc2231_continuation = re.compile(r'^(?P\w+)\*((?P[0-9]+)\*?)?$',
+ re.ASCII)
+
+def decode_params(params):
+ """Decode parameters list according to RFC 2231.
+
+ params is a sequence of 2-tuples containing (param name, string value).
+ """
+ # Copy params so we don't mess with the original
+ params = params[:]
+ new_params = []
+ # Map parameter's name to a list of continuations. The values are a
+ # 3-tuple of the continuation number, the string value, and a flag
+ # specifying whether a particular segment is %-encoded.
+ rfc2231_params = {}
+ name, value = params.pop(0)
+ new_params.append((name, value))
+ while params:
+ name, value = params.pop(0)
+ if name.endswith('*'):
+ encoded = True
+ else:
+ encoded = False
+ value = unquote(value)
+ mo = rfc2231_continuation.match(name)
+ if mo:
+ name, num = mo.group('name', 'num')
+ if num is not None:
+ num = int(num)
+ rfc2231_params.setdefault(name, []).append((num, value, encoded))
+ else:
+ new_params.append((name, '"%s"' % quote(value)))
+ if rfc2231_params:
+ for name, continuations in rfc2231_params.items():
+ value = []
+ extended = False
+ # Sort by number
+ continuations.sort()
+ # And now append all values in numerical order, converting
+ # %-encodings for the encoded segments. If any of the
+ # continuation names ends in a *, then the entire string, after
+ # decoding segments and concatenating, must have the charset and
+ # language specifiers at the beginning of the string.
+ for num, s, encoded in continuations:
+ if encoded:
+ # Decode as "latin-1", so the characters in s directly
+ # represent the percent-encoded octet values.
+ # collapse_rfc2231_value treats this as an octet sequence.
+ s = url_unquote(s, encoding="latin-1")
+ extended = True
+ value.append(s)
+ value = quote(EMPTYSTRING.join(value))
+ if extended:
+ charset, language, value = decode_rfc2231(value)
+ new_params.append((name, (charset, language, '"%s"' % value)))
+ else:
+ new_params.append((name, '"%s"' % value))
+ return new_params
+
+def collapse_rfc2231_value(value, errors='replace',
+ fallback_charset='us-ascii'):
+ if not isinstance(value, tuple) or len(value) != 3:
+ return unquote(value)
+ # While value comes to us as a unicode string, we need it to be a bytes
+ # object. We do not want bytes() normal utf-8 decoder, we want a straight
+ # interpretation of the string as character bytes.
+ charset, language, text = value
+ rawbytes = bytes(text, 'raw-unicode-escape')
+ try:
+ return str(rawbytes, charset, errors)
+ except LookupError:
+ # charset is not a known codec.
+ return unquote(text)
+
+
+#
+# datetime doesn't provide a localtime function yet, so provide one. Code
+# adapted from the patch in issue 9527. This may not be perfect, but it is
+# better than not having it.
+#
+
+def localtime(dt=None, isdst=-1):
+ """Return local time as an aware datetime object.
+
+ If called without arguments, return current time. Otherwise *dt*
+ argument should be a datetime instance, and it is converted to the
+ local time zone according to the system time zone database. If *dt* is
+ naive (that is, dt.tzinfo is None), it is assumed to be in local time.
+ In this case, a positive or zero value for *isdst* causes localtime to
+ presume initially that summer time (for example, Daylight Saving Time)
+ is or is not (respectively) in effect for the specified time. A
+ negative value for *isdst* causes the localtime() function to attempt
+ to divine whether summer time is in effect for the specified time.
+
+ """
+ if dt is None:
+ return datetime.datetime.now(datetime.timezone.utc).astimezone()
+ if dt.tzinfo is not None:
+ return dt.astimezone()
+ # We have a naive datetime. Convert to a (localtime) timetuple and pass to
+ # system mktime together with the isdst hint. System mktime will return
+ # seconds since epoch.
+ tm = dt.timetuple()[:-1] + (isdst,)
+ seconds = time.mktime(tm)
+ localtm = time.localtime(seconds)
+ try:
+ delta = datetime.timedelta(seconds=localtm.tm_gmtoff)
+ tz = datetime.timezone(delta, localtm.tm_zone)
+ except AttributeError:
+ # Compute UTC offset and compare with the value implied by tm_isdst.
+ # If the values match, use the zone name implied by tm_isdst.
+ delta = dt - datetime.datetime(*time.gmtime(seconds)[:6])
+ dst = time.daylight and localtm.tm_isdst > 0
+ gmtoff = -(time.altzone if dst else time.timezone)
+ if delta == datetime.timedelta(seconds=gmtoff):
+ tz = datetime.timezone(delta, time.tzname[dst])
+ else:
+ tz = datetime.timezone(delta)
+ return dt.replace(tzinfo=tz)
diff --git a/future/standard_library/html/__init__.py b/src/future/backports/html/__init__.py
similarity index 99%
rename from future/standard_library/html/__init__.py
rename to src/future/backports/html/__init__.py
index 837afce1..58e133fd 100644
--- a/future/standard_library/html/__init__.py
+++ b/src/future/backports/html/__init__.py
@@ -25,4 +25,3 @@ def escape(s, quote=True):
if quote:
return s.translate(_escape_map_full)
return s.translate(_escape_map)
-
diff --git a/future/standard_library/html/entities.py b/src/future/backports/html/entities.py
similarity index 99%
rename from future/standard_library/html/entities.py
rename to src/future/backports/html/entities.py
index 84b63ddf..5c73f692 100644
--- a/future/standard_library/html/entities.py
+++ b/src/future/backports/html/entities.py
@@ -5,7 +5,6 @@
from __future__ import (absolute_import, division,
print_function, unicode_literals)
-from future import standard_library
from future.builtins import *
@@ -2513,4 +2512,3 @@
entitydefs[name] = chr(codepoint)
del name, codepoint
-
diff --git a/future/standard_library/html/parser.py b/src/future/backports/html/parser.py
similarity index 99%
rename from future/standard_library/html/parser.py
rename to src/future/backports/html/parser.py
index 5b398378..fb652636 100644
--- a/future/standard_library/html/parser.py
+++ b/src/future/backports/html/parser.py
@@ -12,10 +12,8 @@
from __future__ import (absolute_import, division,
print_function, unicode_literals)
-from future import standard_library
from future.builtins import *
-
-import _markupbase
+from future.backports import _markupbase
import re
import warnings
@@ -523,7 +521,7 @@ def replaceEntities(s):
except ValueError:
return '' + s
else:
- from html.entities import html5
+ from future.backports.html.entities import html5
if s in html5:
return html5[s]
elif s.endswith(';'):
@@ -536,4 +534,3 @@ def replaceEntities(s):
return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))",
replaceEntities, s)
-
diff --git a/future/tests/__init__.py b/src/future/backports/http/__init__.py
similarity index 100%
rename from future/tests/__init__.py
rename to src/future/backports/http/__init__.py
diff --git a/future/standard_library/http/client.py b/src/future/backports/http/client.py
similarity index 88%
rename from future/standard_library/http/client.py
rename to src/future/backports/http/client.py
index 8d7ef0f4..e663d125 100644
--- a/future/standard_library/http/client.py
+++ b/src/future/backports/http/client.py
@@ -1,8 +1,9 @@
"""HTTP/1.1 client library
-A backport of the Python 3.3 module to Python 2.7 using ``future``.
+A backport of the Python 3.3 http/client.py module for python-future.
---------------
+
+
HTTPConnection goes through a number of "states", which define when a client
may legally make another request or fetch the response for a particular
@@ -69,24 +70,24 @@
from __future__ import (absolute_import, division,
print_function, unicode_literals)
-from future.builtins import *
-from future.utils import isbytes, istext
+from future.builtins import bytes, int, str, super
+from future.utils import PY2
-import email.parser
-import email.message
+from future.backports.email import parser as email_parser
+from future.backports.email import message as email_message
+from future.backports.misc import create_connection as socket_create_connection
import io
import os
import socket
-import collections
-# Not yet backported:
-# from urllib.parse import urlsplit
-# Use the Py2.7 equivalent:
-from urlparse import urlsplit
-
+from future.backports.urllib.parse import urlsplit
import warnings
-import numbers
from array import array
+if PY2:
+ from collections import Iterable
+else:
+ from collections.abc import Iterable
+
__all__ = ["HTTPResponse", "HTTPConnection",
"HTTPException", "NotConnected", "UnknownProtocol",
"UnknownTransferEncoding", "UnimplementedFileMode",
@@ -226,8 +227,10 @@
# maximal line length when calling readline().
_MAXLINE = 65536
+_MAXHEADERS = 100
-class HTTPMessage(email.message.Message, object):
+
+class HTTPMessage(email_message.Message):
# XXX The only usage of this method is in
# http.server.CGIHTTPRequestHandler. Maybe move the code there so
# that it doesn't need to be part of the public API. The API has
@@ -273,16 +276,17 @@ def parse_headers(fp, _class=HTTPMessage):
if len(line) > _MAXLINE:
raise LineTooLong("header line")
headers.append(line)
+ if len(headers) > _MAXHEADERS:
+ raise HTTPException("got more than %d headers" % _MAXHEADERS)
if line in (b'\r\n', b'\n', b''):
break
hstring = bytes(b'').join(headers).decode('iso-8859-1')
- # Try passing it as bytes to Py2.7 email.parser.parsestr
- # which expects a byte-string
- return email.parser.Parser(_class=_class).parsestr(hstring)
+ return email_parser.Parser(_class=_class).parsestr(hstring)
+
_strict_sentinel = object()
-class HTTPResponse(io.RawIOBase, object):
+class HTTPResponse(io.RawIOBase):
# See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
@@ -513,7 +517,7 @@ def read(self, amt=None):
if amt is not None:
# Amount is given, so call base class version
# (which is implemented in terms of self.readinto)
- return super(HTTPResponse, self).read(amt)
+ return bytes(super(HTTPResponse, self).read(amt))
else:
# Amount is not given (unbounded read) so we must check self.length
# and self.chunked
@@ -531,7 +535,7 @@ def read(self, amt=None):
raise
self.length = 0
self._close_conn() # we read everything
- return s
+ return bytes(s)
def readinto(self, b):
if self.fp is None:
@@ -552,8 +556,15 @@ def readinto(self, b):
# we do not use _safe_read() here because this may be a .will_close
# connection, and the user is reading more bytes than will be provided
# (for example, reading in 1k chunks)
- n = self.fp.readinto(b)
- if not n:
+
+ if PY2:
+ data = self.fp.read(len(b))
+ n = len(data)
+ b[:n] = data
+ else:
+ n = self.fp.readinto(b)
+
+ if not n and b:
# Ideally, we would raise IncompleteRead if the content-length
# wasn't satisfied, but it might break compatibility.
self._close_conn()
@@ -604,7 +615,7 @@ def _readall_chunked(self):
if chunk_left == 0:
break
except ValueError:
- raise IncompleteRead(b''.join(value))
+ raise IncompleteRead(bytes(b'').join(value))
value.append(self._safe_read(chunk_left))
# we read the whole chunk, get another
@@ -677,7 +688,7 @@ def _safe_read(self, amt):
while amt > 0:
chunk = self.fp.read(min(amt, MAXAMOUNT))
if not chunk:
- raise IncompleteRead(b''.join(s), amt)
+ raise IncompleteRead(bytes(b'').join(s), amt)
s.append(chunk)
amt -= len(chunk)
return bytes(b"").join(s)
@@ -689,9 +700,19 @@ def _safe_readinto(self, b):
while total_bytes < len(b):
if MAXAMOUNT < len(mvb):
temp_mvb = mvb[0:MAXAMOUNT]
- n = self.fp.readinto(temp_mvb)
+ if PY2:
+ data = self.fp.read(len(temp_mvb))
+ n = len(data)
+ temp_mvb[:n] = data
+ else:
+ n = self.fp.readinto(temp_mvb)
else:
- n = self.fp.readinto(mvb)
+ if PY2:
+ data = self.fp.read(len(mvb))
+ n = len(data)
+ mvb[:n] = data
+ else:
+ n = self.fp.readinto(mvb)
if not n:
raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
mvb = mvb[n:]
@@ -705,11 +726,11 @@ def getheader(self, name, default=None):
if self.headers is None:
raise ResponseNotReady()
headers = self.headers.get_all(name) or default
- if istext(headers) or not hasattr(headers, '__iter__'):
+ if isinstance(headers, str) or not hasattr(headers, '__iter__'):
return headers
else:
return ', '.join(headers)
-
+
def getheaders(self):
"""Return list of (header, value) tuples."""
if self.headers is None:
@@ -827,7 +848,7 @@ def _tunnel(self):
def connect(self):
"""Connect to the host and port specified in __init__."""
- self.sock = socket.create_connection((self.host,self.port),
+ self.sock = socket_create_connection((self.host,self.port),
self.timeout, self.source_address)
if self._tunnel_host:
self._tunnel()
@@ -881,11 +902,11 @@ def send(self, data):
if encode:
datablock = datablock.encode("iso-8859-1")
self.sock.sendall(datablock)
-
+ return
try:
self.sock.sendall(data)
except TypeError:
- if isinstance(data, collections.Iterable):
+ if isinstance(data, Iterable):
for d in data:
self.sock.sendall(d)
else:
@@ -911,7 +932,7 @@ def _send_output(self, message_body=None):
# If msg and message_body are sent in a single send() call,
# it will avoid performance problems caused by the interaction
# between delayed ack and the Nagle algorithm.
- if isbytes(message_body):
+ if isinstance(message_body, bytes):
msg += message_body
message_body = None
self.send(msg)
@@ -1005,7 +1026,7 @@ def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
# when used as Host header
if self.host.find(':') >= 0:
- host_enc = b'[' + host_enc + b']'
+ host_enc = bytes(b'[' + host_enc + b']')
if self.port == self.default_port:
self.putheader('Host', host_enc)
@@ -1050,10 +1071,10 @@ def putheader(self, header, *values):
for i, one_value in enumerate(values):
if hasattr(one_value, 'encode'):
values[i] = one_value.encode('latin-1')
- elif isinstance(one_value, numbers.Integral):
+ elif isinstance(one_value, int):
values[i] = str(one_value).encode('ascii')
value = bytes(b'\r\n\t').join(values)
- header = header + b': ' + value
+ header = header + bytes(b': ') + value
self._output(header)
def endheaders(self, message_body=None):
@@ -1107,7 +1128,7 @@ def _send_request(self, method, url, body, headers):
self._set_content_length(body)
for hdr, value in headers.items():
self.putheader(hdr, value)
- if istext(body):
+ if isinstance(body, str):
# RFC 2616 Section 3.7.1 says that text default has a
# default charset of iso-8859-1.
body = body.encode('iso-8859-1')
@@ -1170,38 +1191,97 @@ class the response_class variable.
try:
import ssl
+ from ssl import SSLContext
except ImportError:
pass
else:
- ######################################
- # We use the old HTTPSConnection class from Py2.7, because ssl.SSLContext
- # doesn't exist in the Py2.7 stdlib
class HTTPSConnection(HTTPConnection):
"This class allows communication via SSL."
default_port = HTTPS_PORT
+ # XXX Should key_file and cert_file be deprecated in favour of context?
+
def __init__(self, host, port=None, key_file=None, cert_file=None,
- strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
- source_address=None):
- HTTPConnection.__init__(self, host, port, strict, timeout,
- source_address)
+ strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None, **_3to2kwargs):
+ if 'check_hostname' in _3to2kwargs: check_hostname = _3to2kwargs['check_hostname']; del _3to2kwargs['check_hostname']
+ else: check_hostname = None
+ if 'context' in _3to2kwargs: context = _3to2kwargs['context']; del _3to2kwargs['context']
+ else: context = None
+ super(HTTPSConnection, self).__init__(host, port, strict, timeout,
+ source_address)
self.key_file = key_file
self.cert_file = cert_file
+ if context is None:
+ # Some reasonable defaults
+ context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+ context.options |= ssl.OP_NO_SSLv2
+ will_verify = context.verify_mode != ssl.CERT_NONE
+ if check_hostname is None:
+ check_hostname = will_verify
+ elif check_hostname and not will_verify:
+ raise ValueError("check_hostname needs a SSL context with "
+ "either CERT_OPTIONAL or CERT_REQUIRED")
+ if key_file or cert_file:
+ context.load_cert_chain(cert_file, key_file)
+ self._context = context
+ self._check_hostname = check_hostname
def connect(self):
"Connect to a host on a given (SSL) port."
- sock = socket.create_connection((self.host, self.port),
+ sock = socket_create_connection((self.host, self.port),
self.timeout, self.source_address)
+
if self._tunnel_host:
self.sock = sock
self._tunnel()
- self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
+
+ server_hostname = self.host if ssl.HAS_SNI else None
+ self.sock = self._context.wrap_socket(sock,
+ server_hostname=server_hostname)
+ try:
+ if self._check_hostname:
+ ssl.match_hostname(self.sock.getpeercert(), self.host)
+ except Exception:
+ self.sock.shutdown(socket.SHUT_RDWR)
+ self.sock.close()
+ raise
__all__.append("HTTPSConnection")
+ # ######################################
+ # # We use the old HTTPSConnection class from Py2.7, because ssl.SSLContext
+ # # doesn't exist in the Py2.7 stdlib
+ # class HTTPSConnection(HTTPConnection):
+ # "This class allows communication via SSL."
+
+ # default_port = HTTPS_PORT
+
+ # def __init__(self, host, port=None, key_file=None, cert_file=None,
+ # strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ # source_address=None):
+ # HTTPConnection.__init__(self, host, port, strict, timeout,
+ # source_address)
+ # self.key_file = key_file
+ # self.cert_file = cert_file
+
+ # def connect(self):
+ # "Connect to a host on a given (SSL) port."
+
+ # sock = socket_create_connection((self.host, self.port),
+ # self.timeout, self.source_address)
+ # if self._tunnel_host:
+ # self.sock = sock
+ # self._tunnel()
+ # self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
+
+ # __all__.append("HTTPSConnection")
+ # ######################################
+
+
class HTTPException(Exception):
# Subclasses that define an __init__ must call Exception.__init__
# or define self.args. Otherwise, str() will fail.
diff --git a/future/standard_library/http/cookiejar.py b/src/future/backports/http/cookiejar.py
similarity index 82%
rename from future/standard_library/http/cookiejar.py
rename to src/future/backports/http/cookiejar.py
index 3981ff1f..a39242c0 100644
--- a/future/standard_library/http/cookiejar.py
+++ b/src/future/backports/http/cookiejar.py
@@ -1,7 +1,7 @@
r"""HTTP cookie handling for web clients.
-This is based on the Py3.3 ``http.cookiejar`` module and the Py2.7
-``cookielib`` module.
+This is a backport of the Py3.3 ``http.cookiejar`` module for
+python-future.
This module has (now fairly distant) origins in Gisle Aas' Perl module
HTTP::Cookies, from the libwww-perl library.
@@ -27,9 +27,13 @@
MSIECookieJar
"""
-from __future__ import (absolute_import, division) # , unicode_literals)
-from future import standard_library
-# from future.builtins import *
+
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import filter, int, map, open, str
+from future.utils import as_native_str, PY2
__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
@@ -37,15 +41,15 @@
import copy
import datetime
import re
+if PY2:
+ re.ASCII = 0
import time
-import urlparse, urllib
+from future.backports.urllib.parse import urlparse, urlsplit, quote
+from future.backports.http.client import HTTP_PORT
try:
import threading as _threading
except ImportError:
import dummy_threading as _threading
-# Instead of this: import httplib
-# Import this new-style one:
-import http.client # only for the default HTTP port
from calendar import timegm
debug = False # set to True to enable debugging via the logging module
@@ -61,7 +65,7 @@ def _debug(*args):
return logger.debug(*args)
-DEFAULT_HTTP_PORT = str(http.client.HTTP_PORT)
+DEFAULT_HTTP_PORT = str(HTTP_PORT)
MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
"instance initialised with one)")
@@ -135,7 +139,7 @@ def time2netscape(t=None):
UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
-TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
+TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII)
def offset_from_tz_string(tz):
offset = None
if tz in UTC_ZONES:
@@ -205,9 +209,9 @@ def _str2time(day, mon, yr, hr, min, sec, tz):
STRICT_DATE_RE = re.compile(
r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
- "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
+ "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII)
WEEKDAY_RE = re.compile(
- r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
+ r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII)
LOOSE_HTTP_DATE_RE = re.compile(
r"""^
(\d\d?) # day
@@ -221,10 +225,14 @@ def _str2time(day, mon, yr, hr, min, sec, tz):
(?::(\d\d))? # optional seconds
)? # optional clock
\s*
- ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
+ (?:
+ ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+) # timezone
+ \s*
+ )?
+ (?:
+ \(\w+\) # ASCII representation of timezone in parens.
\s*
- (?:\(\w+\))? # ASCII representation of timezone in parens.
- \s*$""", re.X)
+ )?$""", re.X | re.ASCII)
def http2time(text):
"""Returns time in seconds since epoch of time represented by a string.
@@ -294,9 +302,11 @@ def http2time(text):
(?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
)? # optional clock
\s*
- ([-+]?\d\d?:?(:?\d\d)?
- |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
- \s*$""", re.X)
+ (?:
+ ([-+]?\d\d?:?(:?\d\d)?
+ |Z|z) # timezone (Z is "zero meridian", i.e. GMT)
+ \s*
+ )?$""", re.X | re. ASCII)
def iso2time(text):
"""
As for http2time, but parses the ISO 8601 formats:
@@ -384,7 +394,7 @@ def split_header_words(header_values):
[[('Basic', None), ('realm', '"foobar"')]]
"""
- assert not isinstance(header_values, basestring)
+ assert not isinstance(header_values, str)
result = []
for text in header_values:
orig_text = text
@@ -509,7 +519,7 @@ def parse_ns_headers(ns_headers):
return result
-IPV4_RE = re.compile(r"\.\d+$")
+IPV4_RE = re.compile(r"\.\d+$", re.ASCII)
def is_HDN(text):
"""Return True if text is a host domain name."""
# XXX
@@ -594,7 +604,7 @@ def user_domain_match(A, B):
return True
return False
-cut_port_re = re.compile(r":\d+$")
+cut_port_re = re.compile(r":\d+$", re.ASCII)
def request_host(request):
"""Return request-host, as defined by RFC 2965.
@@ -603,7 +613,7 @@ def request_host(request):
"""
url = request.get_full_url()
- host = urllib.parse.urlparse(url)[1]
+ host = urlparse(url)[1]
if host == "":
host = request.get_header("Host", "")
@@ -625,7 +635,7 @@ def eff_request_host(request):
def request_path(request):
"""Path component of request-URI, as defined by RFC 2965."""
url = request.get_full_url()
- parts = urllib.parse.urlsplit(url)
+ parts = urlsplit(url)
path = escape_path(parts.path)
if not path.startswith("/"):
# fix bad RFC 2396 absoluteURI
@@ -633,7 +643,7 @@ def request_path(request):
return path
def request_port(request):
- host = request.get_host()
+ host = request.host
i = host.find(':')
if i >= 0:
port = host[i+1:]
@@ -662,9 +672,7 @@ def escape_path(path):
# And here, kind of: draft-fielding-uri-rfc2396bis-03
# (And in draft IRI specification: draft-duerst-iri-05)
# (And here, for new URI schemes: RFC 2718)
- if isinstance(path, unicode):
- path = path.encode("utf-8")
- path = urllib.quote(path, HTTP_PATH_SAFE)
+ path = quote(path, HTTP_PATH_SAFE)
path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
return path
@@ -803,6 +811,7 @@ def __str__(self):
namevalue = self.name
return "" % (namevalue, limit)
+ @as_native_str()
def __repr__(self):
args = []
for name in ("version", "name", "value",
@@ -812,7 +821,12 @@ def __repr__(self):
"secure", "expires", "discard", "comment", "comment_url",
):
attr = getattr(self, name)
- args.append("%s=%s" % (name, repr(attr)))
+ ### Python-Future:
+ # Avoid u'...' prefixes for unicode strings:
+ if isinstance(attr, str):
+ attr = str(attr)
+ ###
+ args.append(str("%s=%s") % (name, repr(attr)))
args.append("rest=%s" % repr(self._rest))
args.append("rfc2109=%s" % repr(self.rfc2109))
return "Cookie(%s)" % ", ".join(args)
@@ -959,7 +973,7 @@ def set_ok_version(self, cookie, request):
return True
def set_ok_verifiability(self, cookie, request):
- if request.is_unverifiable() and is_third_party(request):
+ if request.unverifiable and is_third_party(request):
if cookie.version > 0 and self.strict_rfc2965_unverifiable:
_debug(" third-party RFC 2965 cookie during "
"unverifiable transaction")
@@ -1098,7 +1112,7 @@ def return_ok_version(self, cookie, request):
return True
def return_ok_verifiability(self, cookie, request):
- if request.is_unverifiable() and is_third_party(request):
+ if request.unverifiable and is_third_party(request):
if cookie.version > 0 and self.strict_rfc2965_unverifiable:
_debug(" third-party RFC 2965 cookie during unverifiable "
"transaction")
@@ -1110,7 +1124,7 @@ def return_ok_verifiability(self, cookie, request):
return True
def return_ok_secure(self, cookie, request):
- if cookie.secure and request.get_type() != "https":
+ if cookie.secure and request.type != "https":
_debug(" secure cookie with non-secure request")
return False
return True
@@ -1217,8 +1231,7 @@ class CookieJar(object):
"""Collection of HTTP cookies.
You may not need to know about this class: try
- urllib2.build_opener(HTTPCookieProcessor).open(url).
-
+ urllib.request.build_opener(HTTPCookieProcessor).open(url).
"""
non_word_re = re.compile(r"\W")
@@ -1227,7 +1240,7 @@ class CookieJar(object):
domain_re = re.compile(r"[^.]*")
dots_re = re.compile(r"^\.+")
- magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)")
+ magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII)
def __init__(self, policy=None):
if policy is None:
@@ -1325,7 +1338,7 @@ def _cookie_attrs(self, cookies):
return attrs
def add_cookie_header(self, request):
- """Add correct Cookie: header to request (urllib2.Request object).
+ """Add correct Cookie: header to request (urllib.request.Request object).
The Cookie2 header is also added unless policy.hide_cookie2 is true.
@@ -1571,8 +1584,8 @@ def make_cookies(self, response, request):
"""Return sequence of Cookie objects extracted from response object."""
# get cookie-attributes for RFC 2965 and Netscape protocols
headers = response.info()
- rfc2965_hdrs = headers.getheaders("Set-Cookie2") # FIXME?
- ns_hdrs = headers.getheaders("Set-Cookie") # FIXME?
+ rfc2965_hdrs = headers.get_all("Set-Cookie2", [])
+ ns_hdrs = headers.get_all("Set-Cookie", [])
rfc2965 = self._policy.rfc2965
netscape = self._policy.netscape
@@ -1731,6 +1744,7 @@ def __len__(self):
for cookie in self: i = i + 1
return i
+ @as_native_str()
def __repr__(self):
r = []
for cookie in self: r.append(repr(cookie))
@@ -1805,5 +1819,298 @@ def revert(self, filename=None,
finally:
self._cookies_lock.release()
-from _LWPCookieJar import LWPCookieJar, lwp_cookie_str
-from _MozillaCookieJar import MozillaCookieJar
+
+def lwp_cookie_str(cookie):
+ """Return string representation of Cookie in an the LWP cookie file format.
+
+ Actually, the format is extended a bit -- see module docstring.
+
+ """
+ h = [(cookie.name, cookie.value),
+ ("path", cookie.path),
+ ("domain", cookie.domain)]
+ if cookie.port is not None: h.append(("port", cookie.port))
+ if cookie.path_specified: h.append(("path_spec", None))
+ if cookie.port_specified: h.append(("port_spec", None))
+ if cookie.domain_initial_dot: h.append(("domain_dot", None))
+ if cookie.secure: h.append(("secure", None))
+ if cookie.expires: h.append(("expires",
+ time2isoz(float(cookie.expires))))
+ if cookie.discard: h.append(("discard", None))
+ if cookie.comment: h.append(("comment", cookie.comment))
+ if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
+
+ keys = sorted(cookie._rest.keys())
+ for k in keys:
+ h.append((k, str(cookie._rest[k])))
+
+ h.append(("version", str(cookie.version)))
+
+ return join_header_words([h])
+
+class LWPCookieJar(FileCookieJar):
+ """
+ The LWPCookieJar saves a sequence of "Set-Cookie3" lines.
+ "Set-Cookie3" is the format used by the libwww-perl library, not known
+ to be compatible with any browser, but which is easy to read and
+ doesn't lose information about RFC 2965 cookies.
+
+ Additional methods
+
+ as_lwp_str(ignore_discard=True, ignore_expired=True)
+
+ """
+
+ def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
+ """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers.
+
+ ignore_discard and ignore_expires: see docstring for FileCookieJar.save
+
+ """
+ now = time.time()
+ r = []
+ for cookie in self:
+ if not ignore_discard and cookie.discard:
+ continue
+ if not ignore_expires and cookie.is_expired(now):
+ continue
+ r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
+ return "\n".join(r+[""])
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ f = open(filename, "w")
+ try:
+ # There really isn't an LWP Cookies 2.0 format, but this indicates
+ # that there is extra information in here (domain_dot and
+ # port_spec) while still being compatible with libwww-perl, I hope.
+ f.write("#LWP-Cookies-2.0\n")
+ f.write(self.as_lwp_str(ignore_discard, ignore_expires))
+ finally:
+ f.close()
+
+ def _really_load(self, f, filename, ignore_discard, ignore_expires):
+ magic = f.readline()
+ if not self.magic_re.search(magic):
+ msg = ("%r does not look like a Set-Cookie3 (LWP) format "
+ "file" % filename)
+ raise LoadError(msg)
+
+ now = time.time()
+
+ header = "Set-Cookie3:"
+ boolean_attrs = ("port_spec", "path_spec", "domain_dot",
+ "secure", "discard")
+ value_attrs = ("version",
+ "port", "path", "domain",
+ "expires",
+ "comment", "commenturl")
+
+ try:
+ while 1:
+ line = f.readline()
+ if line == "": break
+ if not line.startswith(header):
+ continue
+ line = line[len(header):].strip()
+
+ for data in split_header_words([line]):
+ name, value = data[0]
+ standard = {}
+ rest = {}
+ for k in boolean_attrs:
+ standard[k] = False
+ for k, v in data[1:]:
+ if k is not None:
+ lc = k.lower()
+ else:
+ lc = None
+ # don't lose case distinction for unknown fields
+ if (lc in value_attrs) or (lc in boolean_attrs):
+ k = lc
+ if k in boolean_attrs:
+ if v is None: v = True
+ standard[k] = v
+ elif k in value_attrs:
+ standard[k] = v
+ else:
+ rest[k] = v
+
+ h = standard.get
+ expires = h("expires")
+ discard = h("discard")
+ if expires is not None:
+ expires = iso2time(expires)
+ if expires is None:
+ discard = True
+ domain = h("domain")
+ domain_specified = domain.startswith(".")
+ c = Cookie(h("version"), name, value,
+ h("port"), h("port_spec"),
+ domain, domain_specified, h("domain_dot"),
+ h("path"), h("path_spec"),
+ h("secure"),
+ expires,
+ discard,
+ h("comment"),
+ h("commenturl"),
+ rest)
+ if not ignore_discard and c.discard:
+ continue
+ if not ignore_expires and c.is_expired(now):
+ continue
+ self.set_cookie(c)
+
+ except IOError:
+ raise
+ except Exception:
+ _warn_unhandled_exception()
+ raise LoadError("invalid Set-Cookie3 format file %r: %r" %
+ (filename, line))
+
+
+class MozillaCookieJar(FileCookieJar):
+ """
+
+ WARNING: you may want to backup your browser's cookies file if you use
+ this class to save cookies. I *think* it works, but there have been
+ bugs in the past!
+
+ This class differs from CookieJar only in the format it uses to save and
+ load cookies to and from a file. This class uses the Mozilla/Netscape
+ `cookies.txt' format. lynx uses this file format, too.
+
+ Don't expect cookies saved while the browser is running to be noticed by
+ the browser (in fact, Mozilla on unix will overwrite your saved cookies if
+ you change them on disk while it's running; on Windows, you probably can't
+ save at all while the browser is running).
+
+ Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
+ Netscape cookies on saving.
+
+ In particular, the cookie version and port number information is lost,
+ together with information about whether or not Path, Port and Discard were
+ specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
+ domain as set in the HTTP header started with a dot (yes, I'm aware some
+ domains in Netscape files start with a dot and some don't -- trust me, you
+ really don't want to know any more about this).
+
+ Note that though Mozilla and Netscape use the same format, they use
+ slightly different headers. The class saves cookies using the Netscape
+ header by default (Mozilla can cope with that).
+
+ """
+ magic_re = re.compile("#( Netscape)? HTTP Cookie File")
+ header = """\
+# Netscape HTTP Cookie File
+# http://www.netscape.com/newsref/std/cookie_spec.html
+# This is a generated file! Do not edit.
+
+"""
+
+ def _really_load(self, f, filename, ignore_discard, ignore_expires):
+ now = time.time()
+
+ magic = f.readline()
+ if not self.magic_re.search(magic):
+ f.close()
+ raise LoadError(
+ "%r does not look like a Netscape format cookies file" %
+ filename)
+
+ try:
+ while 1:
+ line = f.readline()
+ if line == "": break
+
+ # last field may be absent, so keep any trailing tab
+ if line.endswith("\n"): line = line[:-1]
+
+ # skip comments and blank lines XXX what is $ for?
+ if (line.strip().startswith(("#", "$")) or
+ line.strip() == ""):
+ continue
+
+ domain, domain_specified, path, secure, expires, name, value = \
+ line.split("\t")
+ secure = (secure == "TRUE")
+ domain_specified = (domain_specified == "TRUE")
+ if name == "":
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas http.cookiejar regards it as a
+ # cookie with no value.
+ name = value
+ value = None
+
+ initial_dot = domain.startswith(".")
+ assert domain_specified == initial_dot
+
+ discard = False
+ if expires == "":
+ expires = None
+ discard = True
+
+ # assume path_specified is false
+ c = Cookie(0, name, value,
+ None, False,
+ domain, domain_specified, initial_dot,
+ path, False,
+ secure,
+ expires,
+ discard,
+ None,
+ None,
+ {})
+ if not ignore_discard and c.discard:
+ continue
+ if not ignore_expires and c.is_expired(now):
+ continue
+ self.set_cookie(c)
+
+ except IOError:
+ raise
+ except Exception:
+ _warn_unhandled_exception()
+ raise LoadError("invalid Netscape format cookies file %r: %r" %
+ (filename, line))
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ f = open(filename, "w")
+ try:
+ f.write(self.header)
+ now = time.time()
+ for cookie in self:
+ if not ignore_discard and cookie.discard:
+ continue
+ if not ignore_expires and cookie.is_expired(now):
+ continue
+ if cookie.secure: secure = "TRUE"
+ else: secure = "FALSE"
+ if cookie.domain.startswith("."): initial_dot = "TRUE"
+ else: initial_dot = "FALSE"
+ if cookie.expires is not None:
+ expires = str(cookie.expires)
+ else:
+ expires = ""
+ if cookie.value is None:
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas http.cookiejar regards it as a
+ # cookie with no value.
+ name = ""
+ value = cookie.name
+ else:
+ name = cookie.name
+ value = cookie.value
+ f.write(
+ "\t".join([cookie.domain, initial_dot, cookie.path,
+ secure, expires, name, value])+
+ "\n")
+ finally:
+ f.close()
diff --git a/src/future/backports/http/cookies.py b/src/future/backports/http/cookies.py
new file mode 100644
index 00000000..8bb61e22
--- /dev/null
+++ b/src/future/backports/http/cookies.py
@@ -0,0 +1,598 @@
+####
+# Copyright 2000 by Timothy O'Malley
+#
+# All Rights Reserved
+#
+# Permission to use, copy, modify, and distribute this software
+# and its documentation for any purpose and without fee is hereby
+# granted, provided that the above copyright notice appear in all
+# copies and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Timothy O'Malley not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
+# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+#
+####
+#
+# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp
+# by Timothy O'Malley
+#
+# Cookie.py is a Python module for the handling of HTTP
+# cookies as a Python dictionary. See RFC 2109 for more
+# information on cookies.
+#
+# The original idea to treat Cookies as a dictionary came from
+# Dave Mitchell (davem@magnet.com) in 1995, when he released the
+# first version of nscookie.py.
+#
+####
+
+r"""
+http.cookies module ported to python-future from Py3.3
+
+Here's a sample session to show how to use this module.
+At the moment, this is the only documentation.
+
+The Basics
+----------
+
+Importing is easy...
+
+ >>> from http import cookies
+
+Most of the time you start by creating a cookie.
+
+ >>> C = cookies.SimpleCookie()
+
+Once you've created your Cookie, you can add values just as if it were
+a dictionary.
+
+ >>> C = cookies.SimpleCookie()
+ >>> C["fig"] = "newton"
+ >>> C["sugar"] = "wafer"
+ >>> C.output()
+ 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer'
+
+Notice that the printable representation of a Cookie is the
+appropriate format for a Set-Cookie: header. This is the
+default behavior. You can change the header and printed
+attributes by using the .output() function
+
+ >>> C = cookies.SimpleCookie()
+ >>> C["rocky"] = "road"
+ >>> C["rocky"]["path"] = "/cookie"
+ >>> print(C.output(header="Cookie:"))
+ Cookie: rocky=road; Path=/cookie
+ >>> print(C.output(attrs=[], header="Cookie:"))
+ Cookie: rocky=road
+
+The load() method of a Cookie extracts cookies from a string. In a
+CGI script, you would use this method to extract the cookies from the
+HTTP_COOKIE environment variable.
+
+ >>> C = cookies.SimpleCookie()
+ >>> C.load("chips=ahoy; vienna=finger")
+ >>> C.output()
+ 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger'
+
+The load() method is darn-tootin smart about identifying cookies
+within a string. Escaped quotation marks, nested semicolons, and other
+such trickeries do not confuse it.
+
+ >>> C = cookies.SimpleCookie()
+ >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";')
+ >>> print(C)
+ Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;"
+
+Each element of the Cookie also supports all of the RFC 2109
+Cookie attributes. Here's an example which sets the Path
+attribute.
+
+ >>> C = cookies.SimpleCookie()
+ >>> C["oreo"] = "doublestuff"
+ >>> C["oreo"]["path"] = "/"
+ >>> print(C)
+ Set-Cookie: oreo=doublestuff; Path=/
+
+Each dictionary element has a 'value' attribute, which gives you
+back the value associated with the key.
+
+ >>> C = cookies.SimpleCookie()
+ >>> C["twix"] = "none for you"
+ >>> C["twix"].value
+ 'none for you'
+
+The SimpleCookie expects that all values should be standard strings.
+Just to be sure, SimpleCookie invokes the str() builtin to convert
+the value to a string, when the values are set dictionary-style.
+
+ >>> C = cookies.SimpleCookie()
+ >>> C["number"] = 7
+ >>> C["string"] = "seven"
+ >>> C["number"].value
+ '7'
+ >>> C["string"].value
+ 'seven'
+ >>> C.output()
+ 'Set-Cookie: number=7\r\nSet-Cookie: string=seven'
+
+Finis.
+"""
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import chr, dict, int, str
+from future.utils import PY2, as_native_str
+
+#
+# Import our required modules
+#
+import re
+if PY2:
+ re.ASCII = 0 # for py2 compatibility
+import string
+
+__all__ = ["CookieError", "BaseCookie", "SimpleCookie"]
+
+_nulljoin = ''.join
+_semispacejoin = '; '.join
+_spacejoin = ' '.join
+
+#
+# Define an exception visible to External modules
+#
+class CookieError(Exception):
+ pass
+
+
+# These quoting routines conform to the RFC2109 specification, which in
+# turn references the character definitions from RFC2068. They provide
+# a two-way quoting algorithm. Any non-text character is translated
+# into a 4 character sequence: a forward-slash followed by the
+# three-digit octal equivalent of the character. Any '\' or '"' is
+# quoted with a preceeding '\' slash.
+#
+# These are taken from RFC2068 and RFC2109.
+# _LegalChars is the list of chars which don't require "'s
+# _Translator hash-table for fast quoting
+#
+_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:"
+_Translator = {
+ '\000' : '\\000', '\001' : '\\001', '\002' : '\\002',
+ '\003' : '\\003', '\004' : '\\004', '\005' : '\\005',
+ '\006' : '\\006', '\007' : '\\007', '\010' : '\\010',
+ '\011' : '\\011', '\012' : '\\012', '\013' : '\\013',
+ '\014' : '\\014', '\015' : '\\015', '\016' : '\\016',
+ '\017' : '\\017', '\020' : '\\020', '\021' : '\\021',
+ '\022' : '\\022', '\023' : '\\023', '\024' : '\\024',
+ '\025' : '\\025', '\026' : '\\026', '\027' : '\\027',
+ '\030' : '\\030', '\031' : '\\031', '\032' : '\\032',
+ '\033' : '\\033', '\034' : '\\034', '\035' : '\\035',
+ '\036' : '\\036', '\037' : '\\037',
+
+ # Because of the way browsers really handle cookies (as opposed
+ # to what the RFC says) we also encode , and ;
+
+ ',' : '\\054', ';' : '\\073',
+
+ '"' : '\\"', '\\' : '\\\\',
+
+ '\177' : '\\177', '\200' : '\\200', '\201' : '\\201',
+ '\202' : '\\202', '\203' : '\\203', '\204' : '\\204',
+ '\205' : '\\205', '\206' : '\\206', '\207' : '\\207',
+ '\210' : '\\210', '\211' : '\\211', '\212' : '\\212',
+ '\213' : '\\213', '\214' : '\\214', '\215' : '\\215',
+ '\216' : '\\216', '\217' : '\\217', '\220' : '\\220',
+ '\221' : '\\221', '\222' : '\\222', '\223' : '\\223',
+ '\224' : '\\224', '\225' : '\\225', '\226' : '\\226',
+ '\227' : '\\227', '\230' : '\\230', '\231' : '\\231',
+ '\232' : '\\232', '\233' : '\\233', '\234' : '\\234',
+ '\235' : '\\235', '\236' : '\\236', '\237' : '\\237',
+ '\240' : '\\240', '\241' : '\\241', '\242' : '\\242',
+ '\243' : '\\243', '\244' : '\\244', '\245' : '\\245',
+ '\246' : '\\246', '\247' : '\\247', '\250' : '\\250',
+ '\251' : '\\251', '\252' : '\\252', '\253' : '\\253',
+ '\254' : '\\254', '\255' : '\\255', '\256' : '\\256',
+ '\257' : '\\257', '\260' : '\\260', '\261' : '\\261',
+ '\262' : '\\262', '\263' : '\\263', '\264' : '\\264',
+ '\265' : '\\265', '\266' : '\\266', '\267' : '\\267',
+ '\270' : '\\270', '\271' : '\\271', '\272' : '\\272',
+ '\273' : '\\273', '\274' : '\\274', '\275' : '\\275',
+ '\276' : '\\276', '\277' : '\\277', '\300' : '\\300',
+ '\301' : '\\301', '\302' : '\\302', '\303' : '\\303',
+ '\304' : '\\304', '\305' : '\\305', '\306' : '\\306',
+ '\307' : '\\307', '\310' : '\\310', '\311' : '\\311',
+ '\312' : '\\312', '\313' : '\\313', '\314' : '\\314',
+ '\315' : '\\315', '\316' : '\\316', '\317' : '\\317',
+ '\320' : '\\320', '\321' : '\\321', '\322' : '\\322',
+ '\323' : '\\323', '\324' : '\\324', '\325' : '\\325',
+ '\326' : '\\326', '\327' : '\\327', '\330' : '\\330',
+ '\331' : '\\331', '\332' : '\\332', '\333' : '\\333',
+ '\334' : '\\334', '\335' : '\\335', '\336' : '\\336',
+ '\337' : '\\337', '\340' : '\\340', '\341' : '\\341',
+ '\342' : '\\342', '\343' : '\\343', '\344' : '\\344',
+ '\345' : '\\345', '\346' : '\\346', '\347' : '\\347',
+ '\350' : '\\350', '\351' : '\\351', '\352' : '\\352',
+ '\353' : '\\353', '\354' : '\\354', '\355' : '\\355',
+ '\356' : '\\356', '\357' : '\\357', '\360' : '\\360',
+ '\361' : '\\361', '\362' : '\\362', '\363' : '\\363',
+ '\364' : '\\364', '\365' : '\\365', '\366' : '\\366',
+ '\367' : '\\367', '\370' : '\\370', '\371' : '\\371',
+ '\372' : '\\372', '\373' : '\\373', '\374' : '\\374',
+ '\375' : '\\375', '\376' : '\\376', '\377' : '\\377'
+ }
+
+def _quote(str, LegalChars=_LegalChars):
+ r"""Quote a string for use in a cookie header.
+
+ If the string does not need to be double-quoted, then just return the
+ string. Otherwise, surround the string in doublequotes and quote
+ (with a \) special characters.
+ """
+ if all(c in LegalChars for c in str):
+ return str
+ else:
+ return '"' + _nulljoin(_Translator.get(s, s) for s in str) + '"'
+
+
+_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
+_QuotePatt = re.compile(r"[\\].")
+
+def _unquote(mystr):
+ # If there aren't any doublequotes,
+ # then there can't be any special characters. See RFC 2109.
+ if len(mystr) < 2:
+ return mystr
+ if mystr[0] != '"' or mystr[-1] != '"':
+ return mystr
+
+ # We have to assume that we must decode this string.
+ # Down to work.
+
+ # Remove the "s
+ mystr = mystr[1:-1]
+
+ # Check for special sequences. Examples:
+ # \012 --> \n
+ # \" --> "
+ #
+ i = 0
+ n = len(mystr)
+ res = []
+ while 0 <= i < n:
+ o_match = _OctalPatt.search(mystr, i)
+ q_match = _QuotePatt.search(mystr, i)
+ if not o_match and not q_match: # Neither matched
+ res.append(mystr[i:])
+ break
+ # else:
+ j = k = -1
+ if o_match:
+ j = o_match.start(0)
+ if q_match:
+ k = q_match.start(0)
+ if q_match and (not o_match or k < j): # QuotePatt matched
+ res.append(mystr[i:k])
+ res.append(mystr[k+1])
+ i = k + 2
+ else: # OctalPatt matched
+ res.append(mystr[i:j])
+ res.append(chr(int(mystr[j+1:j+4], 8)))
+ i = j + 4
+ return _nulljoin(res)
+
+# The _getdate() routine is used to set the expiration time in the cookie's HTTP
+# header. By default, _getdate() returns the current time in the appropriate
+# "expires" format for a Set-Cookie header. The one optional argument is an
+# offset from now, in seconds. For example, an offset of -3600 means "one hour
+# ago". The offset may be a floating point number.
+#
+
+_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
+
+_monthname = [None,
+ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+
+def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname):
+ from time import gmtime, time
+ now = time()
+ year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future)
+ return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \
+ (weekdayname[wd], day, monthname[month], year, hh, mm, ss)
+
+
+class Morsel(dict):
+ """A class to hold ONE (key, value) pair.
+
+ In a cookie, each such pair may have several attributes, so this class is
+ used to keep the attributes associated with the appropriate key,value pair.
+ This class also includes a coded_value attribute, which is used to hold
+ the network representation of the value. This is most useful when Python
+ objects are pickled for network transit.
+ """
+ # RFC 2109 lists these attributes as reserved:
+ # path comment domain
+ # max-age secure version
+ #
+ # For historical reasons, these attributes are also reserved:
+ # expires
+ #
+ # This is an extension from Microsoft:
+ # httponly
+ #
+ # This dictionary provides a mapping from the lowercase
+ # variant on the left to the appropriate traditional
+ # formatting on the right.
+ _reserved = {
+ "expires" : "expires",
+ "path" : "Path",
+ "comment" : "Comment",
+ "domain" : "Domain",
+ "max-age" : "Max-Age",
+ "secure" : "secure",
+ "httponly" : "httponly",
+ "version" : "Version",
+ }
+
+ _flags = set(['secure', 'httponly'])
+
+ def __init__(self):
+ # Set defaults
+ self.key = self.value = self.coded_value = None
+
+ # Set default attributes
+ for key in self._reserved:
+ dict.__setitem__(self, key, "")
+
+ def __setitem__(self, K, V):
+ K = K.lower()
+ if not K in self._reserved:
+ raise CookieError("Invalid Attribute %s" % K)
+ dict.__setitem__(self, K, V)
+
+ def isReservedKey(self, K):
+ return K.lower() in self._reserved
+
+ def set(self, key, val, coded_val, LegalChars=_LegalChars):
+ # First we verify that the key isn't a reserved word
+ # Second we make sure it only contains legal characters
+ if key.lower() in self._reserved:
+ raise CookieError("Attempt to set a reserved key: %s" % key)
+ if any(c not in LegalChars for c in key):
+ raise CookieError("Illegal key value: %s" % key)
+
+ # It's a good key, so save it.
+ self.key = key
+ self.value = val
+ self.coded_value = coded_val
+
+ def output(self, attrs=None, header="Set-Cookie:"):
+ return "%s %s" % (header, self.OutputString(attrs))
+
+ __str__ = output
+
+ @as_native_str()
+ def __repr__(self):
+ if PY2 and isinstance(self.value, unicode):
+ val = str(self.value) # make it a newstr to remove the u prefix
+ else:
+ val = self.value
+ return '<%s: %s=%s>' % (self.__class__.__name__,
+ str(self.key), repr(val))
+
+ def js_output(self, attrs=None):
+ # Print javascript
+ return """
+
+ """ % (self.OutputString(attrs).replace('"', r'\"'))
+
+ def OutputString(self, attrs=None):
+ # Build up our result
+ #
+ result = []
+ append = result.append
+
+ # First, the key=value pair
+ append("%s=%s" % (self.key, self.coded_value))
+
+ # Now add any defined attributes
+ if attrs is None:
+ attrs = self._reserved
+ items = sorted(self.items())
+ for key, value in items:
+ if value == "":
+ continue
+ if key not in attrs:
+ continue
+ if key == "expires" and isinstance(value, int):
+ append("%s=%s" % (self._reserved[key], _getdate(value)))
+ elif key == "max-age" and isinstance(value, int):
+ append("%s=%d" % (self._reserved[key], value))
+ elif key == "secure":
+ append(str(self._reserved[key]))
+ elif key == "httponly":
+ append(str(self._reserved[key]))
+ else:
+ append("%s=%s" % (self._reserved[key], value))
+
+ # Return the result
+ return _semispacejoin(result)
+
+
+#
+# Pattern for finding cookie
+#
+# This used to be strict parsing based on the RFC2109 and RFC2068
+# specifications. I have since discovered that MSIE 3.0x doesn't
+# follow the character rules outlined in those specs. As a
+# result, the parsing rules here are less strict.
+#
+
+_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]"
+_CookiePattern = re.compile(r"""
+ (?x) # This is a verbose pattern
+ (?P # Start of group 'key'
+ """ + _LegalCharsPatt + r"""+? # Any word of at least one letter
+ ) # End of group 'key'
+ ( # Optional group: there may not be a value.
+ \s*=\s* # Equal Sign
+ (?P # Start of group 'val'
+ "(?:[^\\"]|\\.)*" # Any doublequoted string
+ | # or
+ \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
+ | # or
+ """ + _LegalCharsPatt + r"""* # Any word or empty string
+ ) # End of group 'val'
+ )? # End of optional value group
+ \s* # Any number of spaces.
+ (\s+|;|$) # Ending either at space, semicolon, or EOS.
+ """, re.ASCII) # May be removed if safe.
+
+
+# At long last, here is the cookie class. Using this class is almost just like
+# using a dictionary. See this module's docstring for example usage.
+#
+class BaseCookie(dict):
+ """A container class for a set of Morsels."""
+
+ def value_decode(self, val):
+ """real_value, coded_value = value_decode(STRING)
+ Called prior to setting a cookie's value from the network
+ representation. The VALUE is the value read from HTTP
+ header.
+ Override this function to modify the behavior of cookies.
+ """
+ return val, val
+
+ def value_encode(self, val):
+ """real_value, coded_value = value_encode(VALUE)
+ Called prior to setting a cookie's value from the dictionary
+ representation. The VALUE is the value being assigned.
+ Override this function to modify the behavior of cookies.
+ """
+ strval = str(val)
+ return strval, strval
+
+ def __init__(self, input=None):
+ if input:
+ self.load(input)
+
+ def __set(self, key, real_value, coded_value):
+ """Private method for setting a cookie's value"""
+ M = self.get(key, Morsel())
+ M.set(key, real_value, coded_value)
+ dict.__setitem__(self, key, M)
+
+ def __setitem__(self, key, value):
+ """Dictionary style assignment."""
+ rval, cval = self.value_encode(value)
+ self.__set(key, rval, cval)
+
+ def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"):
+ """Return a string suitable for HTTP."""
+ result = []
+ items = sorted(self.items())
+ for key, value in items:
+ result.append(value.output(attrs, header))
+ return sep.join(result)
+
+ __str__ = output
+
+ @as_native_str()
+ def __repr__(self):
+ l = []
+ items = sorted(self.items())
+ for key, value in items:
+ if PY2 and isinstance(value.value, unicode):
+ val = str(value.value) # make it a newstr to remove the u prefix
+ else:
+ val = value.value
+ l.append('%s=%s' % (str(key), repr(val)))
+ return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l))
+
+ def js_output(self, attrs=None):
+ """Return a string suitable for JavaScript."""
+ result = []
+ items = sorted(self.items())
+ for key, value in items:
+ result.append(value.js_output(attrs))
+ return _nulljoin(result)
+
+ def load(self, rawdata):
+ """Load cookies from a string (presumably HTTP_COOKIE) or
+ from a dictionary. Loading cookies from a dictionary 'd'
+ is equivalent to calling:
+ map(Cookie.__setitem__, d.keys(), d.values())
+ """
+ if isinstance(rawdata, str):
+ self.__parse_string(rawdata)
+ else:
+ # self.update() wouldn't call our custom __setitem__
+ for key, value in rawdata.items():
+ self[key] = value
+ return
+
+ def __parse_string(self, mystr, patt=_CookiePattern):
+ i = 0 # Our starting point
+ n = len(mystr) # Length of string
+ M = None # current morsel
+
+ while 0 <= i < n:
+ # Start looking for a cookie
+ match = patt.search(mystr, i)
+ if not match:
+ # No more cookies
+ break
+
+ key, value = match.group("key"), match.group("val")
+
+ i = match.end(0)
+
+ # Parse the key, value in case it's metainfo
+ if key[0] == "$":
+ # We ignore attributes which pertain to the cookie
+ # mechanism as a whole. See RFC 2109.
+ # (Does anyone care?)
+ if M:
+ M[key[1:]] = value
+ elif key.lower() in Morsel._reserved:
+ if M:
+ if value is None:
+ if key.lower() in Morsel._flags:
+ M[key] = True
+ else:
+ M[key] = _unquote(value)
+ elif value is not None:
+ rval, cval = self.value_decode(value)
+ self.__set(key, rval, cval)
+ M = self[key]
+
+
+class SimpleCookie(BaseCookie):
+ """
+ SimpleCookie supports strings as cookie values. When setting
+ the value using the dictionary assignment notation, SimpleCookie
+ calls the builtin str() to convert the value to a string. Values
+ received from HTTP are kept as strings.
+ """
+ def value_decode(self, val):
+ return _unquote(val), val
+
+ def value_encode(self, val):
+ strval = str(val)
+ return strval, _quote(strval)
diff --git a/future/standard_library/http/server.py b/src/future/backports/http/server.py
similarity index 96%
rename from future/standard_library/http/server.py
rename to src/future/backports/http/server.py
index b9b4d348..b1c11e0c 100644
--- a/future/standard_library/http/server.py
+++ b/src/future/backports/http/server.py
@@ -35,7 +35,7 @@
from __future__ import (absolute_import, division,
print_function, unicode_literals)
-from future import standard_library
+from future import utils
from future.builtins import *
@@ -91,10 +91,11 @@
__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
-import html
-import email.message
-import email.parser
-import http.client
+from future.backports import html
+from future.backports.http import client as http_client
+from future.backports.urllib import parse as urllib_parse
+from future.backports import socketserver
+
import io
import mimetypes
import os
@@ -102,19 +103,11 @@
import select
import shutil
import socket # For gethostbyaddr()
-import socketserver
import sys
import time
import copy
import argparse
-# Not backported yet:
-# import urllib.parse
-# The old Py2 one instead:
-import urllib
-# Also from 2.7:
-import cgi
-
# Default error message template
DEFAULT_ERROR_MESSAGE = """\
@@ -332,9 +325,9 @@ def parse_request(self):
# Examine the headers and look for a Connection directive.
try:
- self.headers = http.client.parse_headers(self.rfile,
+ self.headers = http_client.parse_headers(self.rfile,
_class=self.MessageClass)
- except http.client.LineTooLong:
+ except http_client.LineTooLong:
self.send_error(400, "Line too long")
return False
@@ -584,7 +577,7 @@ def address_string(self):
protocol_version = "HTTP/1.0"
# MessageClass used to parse headers
- MessageClass = http.client.HTTPMessage
+ MessageClass = http_client.HTTPMessage
# Table mapping response codes to messages; entries have the
# form {code: (shortmessage, longmessage)}.
@@ -748,10 +741,7 @@ def list_directory(self, path):
return None
list.sort(key=lambda a: a.lower())
r = []
- # Urllib.parse not ported yet:
- # displaypath = html.escape(urllib.parse.unquote(self.path))
- # Use this code from the Py2.7 httpservers.py module instead:
- displaypath = cgi.escape(urllib.unquote(self.path))
+ displaypath = html.escape(urllib_parse.unquote(self.path))
enc = sys.getfilesystemencoding()
title = 'Directory listing for %s' % displaypath
r.append('%s'
- # % (urllib.parse.quote(linkname), html.escape(displayname)))
- # Use this instead:
r.append('%s'
- % (urllib.quote(linkname), cgi.escape(displayname)))
+ % (urllib_parse.quote(linkname), html.escape(displayname)))
+ # # Use this instead:
+ # r.append('%s'
+ # % (urllib.quote(linkname), cgi.escape(displayname)))
r.append('\n
\n