Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 25 additions & 4 deletions Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1277,6 +1277,20 @@ def _create_pax_generic_header(cls, pax_headers, type, encoding):
@classmethod
def frombuf(cls, buf, encoding, errors):
"""Construct a TarInfo object from a 512 byte bytes object.

To support the old v7 tar format AREGTYPE headers are
transformed to DIRTYPE headers if their name ends in '/'.
"""
return cls._frombuf(buf, encoding, errors)

@classmethod
def _frombuf(cls, buf, encoding, errors, *, dircheck=True):
"""Construct a TarInfo object from a 512 byte bytes object.

If ``dircheck`` is set to ``True`` then ``AREGTYPE`` headers will
be normalized to ``DIRTYPE`` if the name ends in a trailing slash.
``dircheck`` must be set to ``False`` if this function is called
on a follow-up header such as ``GNUTYPE_LONGNAME``.
"""
if len(buf) == 0:
raise EmptyHeaderError("empty header")
Expand Down Expand Up @@ -1307,7 +1321,7 @@ def frombuf(cls, buf, encoding, errors):

# Old V7 tar format represents a directory as a regular
# file with a trailing slash.
if obj.type == AREGTYPE and obj.name.endswith("/"):
if dircheck and obj.type == AREGTYPE and obj.name.endswith("/"):
obj.type = DIRTYPE

# The old GNU sparse format occupies some of the unused
Expand Down Expand Up @@ -1342,8 +1356,15 @@ def fromtarfile(cls, tarfile):
"""Return the next TarInfo object from TarFile object
tarfile.
"""
return cls._fromtarfile(tarfile)

@classmethod
def _fromtarfile(cls, tarfile, *, dircheck=True):
"""
See dircheck documentation in _frombuf().
"""
buf = tarfile.fileobj.read(BLOCKSIZE)
obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
obj = cls._frombuf(buf, tarfile.encoding, tarfile.errors, dircheck=dircheck)
obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
return obj._proc_member(tarfile)

Expand Down Expand Up @@ -1401,7 +1422,7 @@ def _proc_gnulong(self, tarfile):

# Fetch the next header and process it.
try:
next = self.fromtarfile(tarfile)
next = self._fromtarfile(tarfile, dircheck=False)
except HeaderError as e:
raise SubsequentHeaderError(str(e)) from None

Expand Down Expand Up @@ -1536,7 +1557,7 @@ def _proc_pax(self, tarfile):

# Fetch the next header.
try:
next = self.fromtarfile(tarfile)
next = self._fromtarfile(tarfile, dircheck=False)
except HeaderError as e:
raise SubsequentHeaderError(str(e)) from None

Expand Down
19 changes: 19 additions & 0 deletions Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,25 @@ def test_longname_directory(self):
self.assertIsNotNone(tar.getmember(longdir))
self.assertIsNotNone(tar.getmember(longdir.removesuffix('/')))

def test_longname_file_not_directory(self):
# Test reading a longname file and ensure it is not handled as a directory
# Issue #141707
buf = io.BytesIO()
with tarfile.open(mode='w', fileobj=buf, format=self.format) as tar:
ti = tarfile.TarInfo()
ti.type = tarfile.AREGTYPE
ti.name = ('a' * 99) + '/' + ('b' * 3)
tar.addfile(ti)

expected = {t.name: t.type for t in tar.getmembers()}

buf.seek(0)
with tarfile.open(mode='r', fileobj=buf) as tar:
actual = {t.name: t.type for t in tar.getmembers()}

self.assertEqual(expected, actual)


class GNUReadTest(LongnameTest, ReadTest, unittest.TestCase):

subdir = "gnu"
Expand Down
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -1544,6 +1544,7 @@ Ashwin Ramaswami
Jeff Ramnani
Grant Ramsay
Bayard Randel
Eashwar Ranganathan
Varpu Rantala
Brodie Rao
Rémi Rampin
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Don't change :class:`tarfile.TarInfo` type from ``AREGTYPE`` to ``DIRTYPE`` when parsing
GNU long name or link headers.
Loading