Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion Doc/library/base64.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ POST request.
Added the *wrapcol* parameter.


.. function:: b64decode(s, altchars=None, validate=False)
.. function:: b64decode(s, altchars=None, validate=False, *, ignorechars=None)

Decode the Base64 encoded :term:`bytes-like object` or ASCII string
*s* and return the decoded :class:`bytes`.
Expand All @@ -90,10 +90,19 @@ POST request.
these non-alphabet characters in the input result in a
:exc:`binascii.Error`.

Optional *ignorechars* must be a :term:`bytes-like object` specifying
characters to ignore during decoding. When provided, only characters in
this set will be silently ignored; other non-base64 characters will cause
a :exc:`binascii.Error`. When ``None`` (the default), the behavior is
controlled by the *validate* parameter.

For more information about the strict base64 check, see :func:`binascii.a2b_base64`

May assert or raise a :exc:`ValueError` if the length of *altchars* is not 2.

.. versionchanged:: next
Added the *ignorechars* parameter.

.. function:: standard_b64encode(s)

Encode :term:`bytes-like object` *s* using the standard Base64 alphabet
Expand Down
11 changes: 10 additions & 1 deletion Doc/library/binascii.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ The :mod:`binascii` module defines the following functions:
Added the *backtick* parameter.


.. function:: a2b_base64(string, /, *, strict_mode=False)
.. function:: a2b_base64(string, /, *, strict_mode=False, ignorechars=None)

Convert a block of base64 data back to binary and return the binary data. More
than one line may be passed at a time.
Expand All @@ -63,9 +63,18 @@ The :mod:`binascii` module defines the following functions:
* Contains no excess data after padding (including excess padding, newlines, etc.).
* Does not start with a padding.

Optional *ignorechars* must be a :term:`bytes-like object` specifying
characters to ignore during decoding. When provided, only characters in
this set will be silently ignored; other non-base64 characters will cause
an error. When ``None`` (the default), all non-base64 characters are
silently ignored (unless *strict_mode* is true).

.. versionchanged:: 3.11
Added the *strict_mode* parameter.

.. versionchanged:: next
Added the *ignorechars* parameter.


.. function:: b2a_base64(data, *, wrapcol=0, newline=True)

Expand Down
6 changes: 6 additions & 0 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -444,13 +444,19 @@ base64
* Added the *wrapcol* parameter in :func:`~base64.b64encode`.
(Contributed by Serhiy Storchaka in :gh:`143214`.)

* Added the *ignorechars* parameter in :func:`~base64.b64decode`.
(Contributed by Muneeb Ullah in :gh:`144001`.)


binascii
--------

* Added the *wrapcol* parameter in :func:`~binascii.b2a_base64`.
(Contributed by Serhiy Storchaka in :gh:`143214`.)

* Added the *ignorechars* parameter in :func:`~binascii.a2b_base64`.
(Contributed by Muneeb Ullah in :gh:`144001`.)


calendar
--------
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_global_objects_fini_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Include/internal/pycore_global_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(ident)
STRUCT_FOR_ID(identity_hint)
STRUCT_FOR_ID(ignore)
STRUCT_FOR_ID(ignorechars)
STRUCT_FOR_ID(imag)
STRUCT_FOR_ID(implieslink)
STRUCT_FOR_ID(importlib)
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_runtime_init_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Include/internal/pycore_unicodeobject_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 7 additions & 2 deletions Lib/base64.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def b64encode(s, altchars=None, *, wrapcol=0):
return encoded


def b64decode(s, altchars=None, validate=False):
def b64decode(s, altchars=None, validate=False, *, ignorechars=None):
"""Decode the Base64 encoded bytes-like object or ASCII string s.

Optional altchars must be a bytes-like object or ASCII string of length 2
Expand All @@ -79,13 +79,18 @@ def b64decode(s, altchars=None, validate=False):
For more information about the strict base64 check, see:

https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64

Optional ignorechars must be a bytes-like object specifying characters to
ignore during decoding. When provided, only characters in this set will be
silently ignored; other non-base64 characters will cause a binascii.Error.
When None (the default), the behavior is controlled by the validate parameter.
"""
s = _bytes_from_decode_data(s)
if altchars is not None:
altchars = _bytes_from_decode_data(altchars)
assert len(altchars) == 2, repr(altchars)
s = s.translate(bytes.maketrans(altchars, b'+/'))
return binascii.a2b_base64(s, strict_mode=validate)
return binascii.a2b_base64(s, strict_mode=validate, ignorechars=ignorechars)


def standard_b64encode(s):
Expand Down
41 changes: 41 additions & 0 deletions Lib/test/test_base64.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,47 @@ def test_b64decode_invalid_chars(self):
self.assertEqual(base64.urlsafe_b64decode(b'++//'), res)
self.assertEqual(base64.urlsafe_b64decode(b'--__'), res)

def test_b64decode_ignorechars(self):
# gh-144001: Test ignorechars parameter
eq = self.assertEqual

# Basic functionality: ignore whitespace characters
eq(base64.b64decode(b'YWJj\n', ignorechars=b'\n'), b'abc')
eq(base64.b64decode(b'YWJj\r\n', ignorechars=b'\r\n'), b'abc')
eq(base64.b64decode(b'YWJj \t\n', ignorechars=b' \t\n'), b'abc')

# Multiple whitespace characters in data
eq(base64.b64decode(b'YW Jj\nYW I=', ignorechars=b' \n'), b'abcab')

# ignorechars=b'' should reject all non-base64 characters
with self.assertRaises(binascii.Error):
base64.b64decode(b'YWJj\n', ignorechars=b'')
with self.assertRaises(binascii.Error):
base64.b64decode(b'YWJj ', ignorechars=b'')

# Characters not in ignorechars should raise error
with self.assertRaises(binascii.Error):
base64.b64decode(b'YWJj!', ignorechars=b'\n')
with self.assertRaises(binascii.Error):
base64.b64decode(b'YWJj@', ignorechars=b' \t\n')

# ignorechars with custom characters
eq(base64.b64decode(b'YW|Jj', ignorechars=b'|'), b'abc')
eq(base64.b64decode(b'YW#Jj', ignorechars=b'#'), b'abc')

# Valid base64 with ignorechars=None (default) should work
eq(base64.b64decode(b'YWJj\n', ignorechars=None), b'abc')
eq(base64.b64decode(b'YWJj!', ignorechars=None), b'abc')

# Test with altchars and ignorechars together
eq(base64.b64decode(b'YW-j\n', altchars=b'-_', ignorechars=b'\n'), b'ao\xa3')

# Test string input
eq(base64.b64decode('YWJj\n', ignorechars=b'\n'), b'abc')

# Test that ignorechars accepts various bytes-like objects
eq(base64.b64decode(b'YWJj\n', ignorechars=bytearray(b'\n')), b'abc')

def _altchars_strategy():
"""Generate 'altchars' for base64 encoding."""
reserved_chars = (string.digits + string.ascii_letters + "=").encode()
Expand Down
39 changes: 39 additions & 0 deletions Lib/test/test_binascii.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,45 @@ def assertExcessPadding(data, non_strict_mode_expected_result: bytes):
assertExcessPadding(b'abcd====', b'i\xb7\x1d')
assertExcessPadding(b'abcd=====', b'i\xb7\x1d')

def test_base64_ignorechars(self):
# gh-144001: Test ignorechars parameter for a2b_base64
a2b = binascii.a2b_base64
type2test = self.type2test

# Basic functionality: ignore specified characters
self.assertEqual(a2b(type2test(b'YWJj\n'), ignorechars=b'\n'), b'abc')
self.assertEqual(a2b(type2test(b'YWJj\r\n'), ignorechars=b'\r\n'), b'abc')
self.assertEqual(a2b(type2test(b'YWJj \t\n'), ignorechars=b' \t\n'), b'abc')

# Multiple ignored characters in data
self.assertEqual(a2b(type2test(b'YW Jj\nYW I='), ignorechars=b' \n'), b'abcab')

# ignorechars=b'' should reject all non-base64 characters
with self.assertRaisesRegex(binascii.Error, r'(?i)Only base64 data'):
a2b(type2test(b'YWJj\n'), ignorechars=b'')
with self.assertRaisesRegex(binascii.Error, r'(?i)Only base64 data'):
a2b(type2test(b'YWJj '), ignorechars=b'')

# Characters not in ignorechars should raise error
with self.assertRaisesRegex(binascii.Error, r'(?i)Only base64 data'):
a2b(type2test(b'YWJj!'), ignorechars=b'\n')
with self.assertRaisesRegex(binascii.Error, r'(?i)Only base64 data'):
a2b(type2test(b'YWJj@'), ignorechars=b' \t\n')

# ignorechars with custom characters
self.assertEqual(a2b(type2test(b'YW|Jj'), ignorechars=b'|'), b'abc')
self.assertEqual(a2b(type2test(b'YW#Jj'), ignorechars=b'#'), b'abc')

# ignorechars=None should use default behavior (ignore all non-base64)
self.assertEqual(a2b(type2test(b'YWJj\n'), ignorechars=None), b'abc')
self.assertEqual(a2b(type2test(b'YWJj!'), ignorechars=None), b'abc')

# Test interaction with strict_mode
# When both are used, ignorechars takes precedence for character filtering
self.assertEqual(a2b(type2test(b'YWJj\n'), ignorechars=b'\n', strict_mode=False), b'abc')

# Test that ignorechars accepts various bytes-like objects
self.assertEqual(a2b(type2test(b'YWJj\n'), ignorechars=bytearray(b'\n')), b'abc')

def test_base64errors(self):
# Test base64 with invalid padding
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Add the ``ignorechars`` parameter to :func:`binascii.a2b_base64` and
:func:`base64.b64decode`. When provided, only characters in this set will be
silently ignored during decoding; other non-base64 characters will cause an
error. This allows selective filtering of characters (e.g., ignoring
whitespace while rejecting other invalid characters), similar to the existing
``ignorechars`` parameter in :func:`base64.a85decode`.
56 changes: 52 additions & 4 deletions Modules/binascii.c
Original file line number Diff line number Diff line change
Expand Up @@ -477,17 +477,24 @@ binascii.a2b_base64
/
*
strict_mode: bool = False
ignorechars: object = None

Decode a line of base64 data.

strict_mode
When set to True, bytes that are not part of the base64 standard are not allowed.
The same applies to excess data after padding (= / ==).
ignorechars
A bytes-like object specifying characters to ignore during decoding.
When provided, only characters in this set will be silently ignored;
other non-base64 characters will cause an error. When None (the default),
all non-base64 characters are silently ignored (unless strict_mode is True).
[clinic start generated code]*/

static PyObject *
binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
/*[clinic end generated code: output=5409557788d4f975 input=13c797187acc9c40]*/
binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
PyObject *ignorechars)
/*[clinic end generated code: output=7d2b92b6f1de3ccc input=485946ff2e8960c6]*/
{
assert(data->len >= 0);

Expand All @@ -496,10 +503,30 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
binascii_state *state = NULL;
char padding_started = 0;

/* Handle ignorechars parameter */
Py_buffer ignorechars_buf = {0};
int has_ignorechars = 0;
unsigned char ignorechars_table[256] = {0}; /* Lookup table for ignored chars */

if (ignorechars != Py_None) {
if (PyObject_GetBuffer(ignorechars, &ignorechars_buf, PyBUF_SIMPLE) < 0) {
return NULL;
}
has_ignorechars = 1;
/* Build lookup table for O(1) character checking */
const unsigned char *ic = (const unsigned char *)ignorechars_buf.buf;
for (Py_ssize_t j = 0; j < ignorechars_buf.len; j++) {
ignorechars_table[ic[j]] = 1;
}
}

/* Allocate the buffer */
Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
PyBytesWriter *writer = PyBytesWriter_Create(bin_len);
if (writer == NULL) {
if (has_ignorechars) {
PyBuffer_Release(&ignorechars_buf);
}
return NULL;
}
unsigned char *bin_data = PyBytesWriter_GetData(writer);
Expand All @@ -517,8 +544,9 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
/* Fast path: use optimized decoder for complete quads.
* This works for both strict and non-strict mode for valid input.
* The fast path stops at padding, invalid chars, or incomplete groups.
* Skip fast path when ignorechars is provided, as we need to check each char.
*/
if (ascii_len >= 4) {
if (ascii_len >= 4 && !has_ignorechars) {
Py_ssize_t fast_chars = base64_decode_fast(ascii_data, (Py_ssize_t)ascii_len,
bin_data, table_a2b_base64);
if (fast_chars > 0) {
Expand All @@ -533,6 +561,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
int pads = 0;
for (; i < ascii_len; i++) {
unsigned char this_ch = ascii_data[i];
unsigned char orig_ch = this_ch; /* Save original for ignorechars check */

/* Check for pad sequences and ignore
** the invalid ones.
Expand Down Expand Up @@ -567,7 +596,20 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)

this_ch = table_a2b_base64[this_ch];
if (this_ch >= 64) {
if (strict_mode) {
/* Non-base64 character found */
if (has_ignorechars) {
/* When ignorechars is provided, only skip if char is in the set */
if (ignorechars_table[orig_ch]) {
continue; /* Character is in ignorechars, skip it */
}
/* Character not in ignorechars, raise error */
state = get_binascii_state(module);
if (state) {
PyErr_SetString(state->Error, "Only base64 data is allowed");
}
goto error_end;
}
else if (strict_mode) {
state = get_binascii_state(module);
if (state) {
PyErr_SetString(state->Error, "Only base64 data is allowed");
Expand Down Expand Up @@ -634,9 +676,15 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
}

done:
if (has_ignorechars) {
PyBuffer_Release(&ignorechars_buf);
}
return PyBytesWriter_FinishWithPointer(writer, bin_data);

error_end:
if (has_ignorechars) {
PyBuffer_Release(&ignorechars_buf);
}
PyBytesWriter_Discard(writer);
return NULL;
}
Expand Down
Loading
Loading