summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Withnall <philip.withnall@collabora.co.uk>2016-08-08 17:34:57 +0100
committerThibault Saunier <tsaunier@gnome.org>2016-08-09 14:37:47 -0400
commitea6b591d6ae4e3e57cb4985e823e8e7d2700dae3 (patch)
treec668542da9b06e67359f0b23228737c87ee57d9d
parenta42d52a20098860c367df07d83f85d1a555d6ed6 (diff)
git-phab: Fix content type detection for binary files
The mime_type field of GitPython’s Blob object is unreliable: it detects the type based entirely on the blob’s filename, which results in weird types for files like ‘configure.ac’. Instead, since we have the data available, we can check whether any of the bytes in the file are non-ASCII, which is precisely when we should use a binary diff. Signed-off-by: Philip Withnall <philip.withnall@collabora.co.uk> Differential Revision: https://phabricator.freedesktop.org/D1258
-rwxr-xr-xgit-phab26
1 files changed, 20 insertions, 6 deletions
diff --git a/git-phab b/git-phab
index 910a8c2..3292f26 100755
--- a/git-phab
+++ b/git-phab
@@ -761,6 +761,19 @@ Paste API Token from that page and press <enter>: """ % self.phabricator_uri)
761 761
762 return subprocess.check_output(command).decode("utf-8") 762 return subprocess.check_output(command).decode("utf-8")
763 763
764 def blob_is_binary(self, blob):
765 if not blob:
766 return False
767
768 bytes = blob.data_stream[-1].read()
769 # The mime_type field of a gitpython blob is based only on its filename
770 # which means that files like 'configure.ac' will return weird MIME
771 # types, unsuitable for working out whether they are text. Instead,
772 # check whether any of the bytes in the blob are non-ASCII.
773 textchars = bytearray({7, 8, 9, 10, 12, 13, 27} |
774 set(range(0x20, 0x100)) - {0x7f})
775 return bool(bytes.translate(None, textchars))
776
764 def get_changes_for_diff(self, diff): 777 def get_changes_for_diff(self, diff):
765 def file_len(fname): 778 def file_len(fname):
766 i = 0 779 i = 0
@@ -813,8 +826,8 @@ Paste API Token from that page and press <enter>: """ % self.phabricator_uri)
813 elif l.startswith("-"): 826 elif l.startswith("-"):
814 removed_lines += 1 827 removed_lines += 1
815 828
816 is_text = diff.b_blob.mime_type.startswith( 829 is_text = (not self.blob_is_binary(diff.a_blob) and
817 "text/") if diff.b_blob else True 830 not self.blob_is_binary(diff.b_blob))
818 if is_text: 831 if is_text:
819 if diff.deleted_file: 832 if diff.deleted_file:
820 file_length = 0 833 file_length = 0
@@ -936,8 +949,8 @@ Paste API Token from that page and press <enter>: """ % self.phabricator_uri)
936 diffs = self.get_git_diffs(commit) 949 diffs = self.get_git_diffs(commit)
937 has_binary = False 950 has_binary = False
938 for d in diffs: 951 for d in diffs:
939 if d.b_blob and not d.b_blob.mime_type.startswith( 952 if d.b_blob and \
940 "text/"): 953 self.blob_is_binary(d.b_blob):
941 has_binary = True 954 has_binary = True
942 break 955 break
943 956
@@ -989,7 +1002,8 @@ Paste API Token from that page and press <enter>: """ % self.phabricator_uri)
989 1002
990 phab_fields.append("Projects: %s" % ','.join(self.project_phids)) 1003 phab_fields.append("Projects: %s" % ','.join(self.project_phids))
991 1004
992 summary = ('\n'.join(body) + '\n' + '\n'.join(git_fields)).strip('\r\n') 1005 summary = ('\n'.join(body) + '\n' +
1006 '\n'.join(git_fields)).strip('\r\n')
993 1007
994 revision_id = self.get_differential_id(self.repo.head.commit) 1008 revision_id = self.get_differential_id(self.repo.head.commit)
995 if revision_id: 1009 if revision_id:
@@ -1718,7 +1732,7 @@ Paste API Token from that page and press <enter>: """ % self.phabricator_uri)
1718 git_fields.append(field) 1732 git_fields.append(field)
1719 1733
1720 msg = self.format_commit_msg(subject, body, git_fields, 1734 msg = self.format_commit_msg(subject, body, git_fields,
1721 phab_fields, True) 1735 phab_fields, True)
1722 self.repo.git.commit(amend=True, message=msg) 1736 self.repo.git.commit(amend=True, message=msg)
1723 1737
1724 orig_branch.commit = self.repo.head.commit 1738 orig_branch.commit = self.repo.head.commit