[Git][reproducible-builds/diffoscope][master] 2 commits: add specialize_as(), use it to speed up .smali comparison in APKs
Chris Lamb (@lamby)
gitlab at salsa.debian.org
Thu Aug 31 15:50:56 UTC 2023
Chris Lamb pushed to branch master at Reproducible Builds / diffoscope
Commits:
fe513c02 by FC Stegerman at 2023-08-31T08:29:10-07:00
add specialize_as(), use it to speed up .smali comparison in APKs
- - - - -
1f8d9e17 by Chris Lamb at 2023-08-31T08:50:13-07:00
Add documentation for the new specialize_as, and expand the documentation of `specialize` too. (Re: reproducible-builds/diffoscope!108)
- - - - -
2 changed files:
- diffoscope/comparators/apk.py
- diffoscope/comparators/utils/specialize.py
Changes:
=====================================
diffoscope/comparators/apk.py
=====================================
@@ -35,9 +35,11 @@ from diffoscope.tools import (
)
from diffoscope.tempfiles import get_temporary_directory
-from .utils.archive import Archive
+from .text import TextFile
+from .utils.archive import Archive, ArchiveMember
from .utils.command import Command
from .utils.compare import compare_files
+from .utils.specialize import specialize_as
from .zip import ZipContainer, zipinfo_differences, ZipFileBase
from .missing_file import MissingFile
@@ -157,6 +159,14 @@ class ApkContainer(Archive):
def get_member_names(self):
return self._members
+ def get_member(self, member_name):
+ member = ArchiveMember(self, member_name)
+ if member_name.endswith(".smali") and member_name.startswith("smali"):
+ # smali{,_classesN}/**/*.smali files from apktool are always text,
+ # and using libmagic on thousands of these files takes minutes
+ return specialize_as(TextFile, member)
+ return member
+
def extract(self, member_name, dest_dir):
return os.path.join(self._tmpdir.name, member_name)
=====================================
diffoscope/comparators/utils/specialize.py
=====================================
@@ -28,9 +28,6 @@ logger = logging.getLogger(__name__)
def try_recognize(file, cls, recognizes):
- if isinstance(file, cls):
- return True
-
# Does this file class match?
with profile("recognizes", file):
# logger.debug("trying %s on %s", cls, file)
@@ -43,17 +40,40 @@ def try_recognize(file, cls, recognizes):
format_class(cls, strip="diffoscope.comparators."),
file.name,
)
- new_cls = type(cls.__name__, (cls, type(file)), {})
- file.__class__ = new_cls
+ specialize_as(cls, file)
return True
+def specialize_as(cls, file):
+ """
+ Sometimes it is near-certain that files within a Container with a given
+ extension (say) are of a known File type. We therefore do not need to run
+ libmagic on these files, especially in cases where the Container contains
+ hundreds of similar/smal files. (This can be seeen in the case of apktool
+ and .smali files). In this case, this method can be used to essentially
+ fix/force the type. Care should naturally be taken within Container
+ implementations; such as checking the file extension and so forth.
+ """
+
+ new_cls = type(cls.__name__, (cls, type(file)), {})
+ file.__class__ = new_cls
+ return file
+
+
def specialize(file):
+ # If we already know the class (ie. via `specialize_as`), then we do not
+ # need to run `File.recognizes` at all.
+ for cls in ComparatorManager().classes:
+ if isinstance(file, cls):
+ return file
+
+ # Run the usual `File.recognizes` implementation.
for cls in ComparatorManager().classes:
if try_recognize(file, cls, cls.recognizes):
return file
+ # If there are no matches, run the fallback implementation.
for cls in ComparatorManager().classes:
if try_recognize(file, cls, cls.fallback_recognizes):
logger.debug(
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/bf334e1d83d5c1a0ccaa0da4f2d8ecaeb2f1b84d...1f8d9e17b01c12c773ad8ba9e93458b054654bc7
--
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/bf334e1d83d5c1a0ccaa0da4f2d8ecaeb2f1b84d...1f8d9e17b01c12c773ad8ba9e93458b054654bc7
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20230831/06178326/attachment.htm>
More information about the rb-commits
mailing list