From: Joel Holdsworth <jholdsworth@nvidia.com>
To: git@vger.kernel.org, Luke Diamand <luke@diamand.org>,
Junio C Hamano <gitster@pobox.com>
Cc: Tzadik Vanderhoof <tzadik.vanderhoof@gmail.com>,
Dorgon Chang <dorgonman@hotmail.com>,
Joachim Kuebart <joachim.kuebart@gmail.com>,
Daniel Levin <dendy.ua@gmail.com>,
Johannes Schindelin <johannes.schindelin@gmx.de>,
Ben Keene <seraphire@gmail.com>,
Andrew Oakley <andrew@adoakley.name>,
Joel Holdsworth <jholdsworth@nvidia.com>
Subject: [PATCH v2 RESEND 2/5] git-p4: pre-compile RCS keyword regexes
Date: Tue, 4 Jan 2022 12:49:10 +0000 [thread overview]
Message-ID: <20220104124913.2894-3-jholdsworth@nvidia.com> (raw)
In-Reply-To: <20220104124913.2894-1-jholdsworth@nvidia.com>
Previously git-p4.py would compile one of two regular expressions for
ever RCS keyword-enabled file. This patch improves simplifies the code
by pre-compiling the two regular expressions when the script first
loads.
Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
---
git-p4.py | 48 ++++++++++++++++++------------------------------
1 file changed, 18 insertions(+), 30 deletions(-)
diff --git a/git-p4.py b/git-p4.py
index 226cdef424..0af83b9c72 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -56,6 +56,9 @@
p4_access_checked = False
+re_ko_keywords = re.compile(r'\$(Id|Header)(:[^$\n]+)?\$')
+re_k_keywords = re.compile(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision)(:[^$\n]+)?\$')
+
def p4_build_cmd(cmd):
"""Build a suitable p4 command line.
@@ -577,20 +580,12 @@ def p4_type(f):
#
def p4_keywords_regexp_for_type(base, type_mods):
if base in ("text", "unicode", "binary"):
- kwords = None
if "ko" in type_mods:
- kwords = 'Id|Header'
+ return re_ko_keywords
elif "k" in type_mods:
- kwords = 'Id|Header|Author|Date|DateTime|Change|File|Revision'
+ return re_k_keywords
else:
return None
- pattern = r"""
- \$ # Starts with a dollar, followed by...
- (%s) # one of the keywords, followed by...
- (:[^$\n]+)? # possibly an old expansion, followed by...
- \$ # another dollar
- """ % kwords
- return pattern
else:
return None
@@ -1753,15 +1748,13 @@ def prepareLogMessage(self, template, message, jobs):
return result
- def patchRCSKeywords(self, file, pattern):
- # Attempt to zap the RCS keywords in a p4 controlled file matching the given pattern
+ def patchRCSKeywords(self, file, regexp):
+ # Attempt to zap the RCS keywords in a p4 controlled file matching the given regex
(handle, outFileName) = tempfile.mkstemp(dir='.')
try:
with os.fdopen(handle, "w+") as outFile, open(file, "r") as inFile:
- regexp = re.compile(pattern, re.VERBOSE)
for line in inFile.readlines():
- line = regexp.sub(r'$\1$', line)
- outFile.write(line)
+ outFile.write(regexp.sub(r'$\1$', line))
# Forcibly overwrite the original file
os.unlink(file)
shutil.move(outFileName, file)
@@ -2088,25 +2081,22 @@ def applyCommit(self, id):
# the patch to see if that's possible.
if gitConfigBool("git-p4.attemptRCSCleanup"):
file = None
- pattern = None
kwfiles = {}
for file in editedFiles | filesToDelete:
# did this file's delta contain RCS keywords?
- pattern = p4_keywords_regexp_for_file(file)
-
- if pattern:
+ regexp = p4_keywords_regexp_for_file(file)
+ if regexp:
# this file is a possibility...look for RCS keywords.
- regexp = re.compile(pattern, re.VERBOSE)
for line in read_pipe_lines(["git", "diff", "%s^..%s" % (id, id), file]):
if regexp.search(line):
if verbose:
- print("got keyword match on %s in %s in %s" % (pattern, line, file))
- kwfiles[file] = pattern
+ print("got keyword match on %s in %s in %s" % (regex.pattern, line, file))
+ kwfiles[file] = regexp
break
- for file in kwfiles:
+ for file, regexp in kwfiles.items():
if verbose:
- print("zapping %s with %s" % (line,pattern))
+ print("zapping %s with %s" % (line, regexp.pattern))
# File is being deleted, so not open in p4. Must
# disable the read-only bit on windows.
if self.isWindows and file not in editedFiles:
@@ -3026,12 +3016,10 @@ def streamOneP4File(self, file, contents):
# Note that we do not try to de-mangle keywords on utf16 files,
# even though in theory somebody may want that.
- pattern = p4_keywords_regexp_for_type(type_base, type_mods)
- if pattern:
- regexp = re.compile(pattern, re.VERBOSE)
- text = ''.join(decode_text_stream(c) for c in contents)
- text = regexp.sub(r'$\1$', text)
- contents = [ encode_text_stream(text) ]
+ regexp = p4_keywords_regexp_for_type(type_base, type_mods)
+ if regexp:
+ contents = [encode_text_stream(regexp.sub(
+ r'$\1$', ''.join(decode_text_stream(c) for c in contents)))]
if self.largeFileSystem:
(git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents)
--
2.34.1
next prev parent reply other threads:[~2022-01-04 12:49 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-04 12:49 [PATCH v2 RESEND 0/5] git-p4: fix RCS keyword processing encoding errors Joel Holdsworth
2022-01-04 12:49 ` [PATCH v2 RESEND 1/5] git-p4: use with statements to close files after use in patchRCSKeywords Joel Holdsworth
2022-01-04 12:49 ` Joel Holdsworth [this message]
2022-01-04 12:49 ` [PATCH v2 RESEND 3/5] git-p4: add raw option to read_pipelines Joel Holdsworth
2022-01-04 12:49 ` [PATCH v2 RESEND 4/5] git-p4: open temporary patch file for write only Joel Holdsworth
2022-01-04 12:49 ` [PATCH v2 RESEND 5/5] git-p4: resolve RCS keywords in bytes not utf-8 Joel Holdsworth
2022-01-04 21:47 ` [PATCH v2 RESEND 0/5] git-p4: fix RCS keyword processing encoding errors Junio C Hamano
2022-01-04 21:59 ` Joel Holdsworth
2022-01-04 22:48 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220104124913.2894-3-jholdsworth@nvidia.com \
--to=jholdsworth@nvidia.com \
--cc=andrew@adoakley.name \
--cc=dendy.ua@gmail.com \
--cc=dorgonman@hotmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=joachim.kuebart@gmail.com \
--cc=johannes.schindelin@gmx.de \
--cc=luke@diamand.org \
--cc=seraphire@gmail.com \
--cc=tzadik.vanderhoof@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.