diff --git a/cvs2svn_rcsparse/common.py b/cvs2svn_rcsparse/common.py index d99ce97cb..c368986ae 100644 --- a/cvs2svn_rcsparse/common.py +++ b/cvs2svn_rcsparse/common.py @@ -1,6 +1,6 @@ # -*-python-*- # -# Copyright (C) 1999-2014 The ViewCVS Group. All Rights Reserved. +# Copyright (C) 1999-2020 The ViewCVS Group. All Rights Reserved. # # By using this file, you agree to the terms and conditions set forth in # the LICENSE.html file which can be found at the top level of the ViewVC @@ -14,6 +14,10 @@ import calendar import string +import sys +PY3 = (sys.version_info[0] >= 3) + +DIGITS = string.digits.encode('ascii') if PY3 else string.digits class Sink: """Interface to be implemented by clients. The RCS parser calls this as @@ -118,7 +122,7 @@ def set_comment(self, comment): Parameter: COMMENT is a string containing the comment. This may be multi-line. - This field does not seem to be used by CVS. + This field does not seem to be used by CVS. """ pass @@ -256,7 +260,7 @@ def _read_until_semicolon(self): while 1: token = self.ts.get() - if token == ';': + if token == b';': break tokens.append(token) @@ -264,19 +268,23 @@ def _read_until_semicolon(self): def _parse_admin_head(self, token): rev = self.ts.get() - if rev == ';': + if rev == b';': # The head revision is not specified. Just drop the semicolon # on the floor. pass else: + if PY3: + rev = rev.decode('ascii', 'surrogateescape') self.sink.set_head_revision(rev) - self.ts.match(';') + self.ts.match(b';') def _parse_admin_branch(self, token): branch = self.ts.get() - if branch != ';': + if branch != b';': + if PY3: + branch = branch.decode('ascii', 'surrogateescape') self.sink.set_principal_branch(branch) - self.ts.match(';') + self.ts.match(b';') def _parse_admin_access(self, token): accessors = self._read_until_semicolon() @@ -286,44 +294,55 @@ def _parse_admin_access(self, token): def _parse_admin_symbols(self, token): while 1: tag_name = self.ts.get() - if tag_name == ';': + if tag_name == b';': break - self.ts.match(':') + self.ts.match(b':') tag_rev = self.ts.get() + if PY3: + tag_name = tag_name.decode('ascii', 'surrogateescape') + tag_rev = tag_rev.decode('ascii', 'surrogateescape') self.sink.define_tag(tag_name, tag_rev) def _parse_admin_locks(self, token): while 1: locker = self.ts.get() - if locker == ';': + if locker == b';': break - self.ts.match(':') + self.ts.match(b':') rev = self.ts.get() + if PY3: + rev = rev.decode('ascii', 'surrogateescape') + locker = locker.decode('ascii', 'surrogateescape') self.sink.set_locker(rev, locker) def _parse_admin_strict(self, token): self.sink.set_locking("strict") - self.ts.match(';') + self.ts.match(b';') def _parse_admin_comment(self, token): - self.sink.set_comment(self.ts.get()) - self.ts.match(';') + if PY3: + self.sink.set_comment(self.ts.get().decode('ascii', 'surrogateescape')) + else: + self.sink.set_comment(self.ts.get()) + self.ts.match(b';') def _parse_admin_expand(self, token): expand_mode = self.ts.get() + if PY3: + expand_mode = expand_mode.decode('ascii', 'surrogateescape') self.sink.set_expansion(expand_mode) - self.ts.match(';') + self.ts.match(b';') admin_token_map = { - 'head' : _parse_admin_head, - 'branch' : _parse_admin_branch, - 'access' : _parse_admin_access, - 'symbols' : _parse_admin_symbols, - 'locks' : _parse_admin_locks, - 'strict' : _parse_admin_strict, - 'comment' : _parse_admin_comment, - 'expand' : _parse_admin_expand, - 'desc' : None, + b'head' : _parse_admin_head, + b'branch' : _parse_admin_branch, + b'access' : _parse_admin_access, + b'symbols' : _parse_admin_symbols, + b'locks' : _parse_admin_locks, + b'strict' : _parse_admin_strict, + b'comment' : _parse_admin_comment, + b'expand' : _parse_admin_expand, + b'desc' : None, } def parse_rcs_admin(self): @@ -335,13 +354,13 @@ def parse_rcs_admin(self): f = self.admin_token_map[token] except KeyError: # We're done once we reach the description of the RCS tree - if token[0] in string.digits: + if token[0] in DIGITS: self.ts.unget(token) return else: # Chew up "newphrase" # warn("Unexpected RCS token: $token\n") - while self.ts.get() != ';': + while self.ts.get() != b';': pass else: if f is None: @@ -352,54 +371,57 @@ def parse_rcs_admin(self): def _parse_rcs_tree_entry(self, revision): # Parse date - self.ts.match('date') + self.ts.match(b'date') date = self.ts.get() - self.ts.match(';') + self.ts.match(b';') # Convert date into standard UNIX time format (seconds since epoch) - date_fields = string.split(date, '.') + if PY3: + date_fields = date.decode('ascii','surrogateescape').split('.') + else: + date_fields = date.split('.') # According to rcsfile(5): the year "contains just the last two # digits of the year for years from 1900 through 1999, and all the # digits of years thereafter". if len(date_fields[0]) == 2: date_fields[0] = '19' + date_fields[0] - date_fields = map(string.atoi, date_fields) + date_fields = [int(x) for x in date_fields] EPOCH = 1970 if date_fields[0] < EPOCH: - raise ValueError, 'invalid year for revision %s' % (revision,) + raise ValueError('invalid year for revision %s' % (revision,)) try: timestamp = calendar.timegm(tuple(date_fields) + (0, 0, 0,)) - except ValueError, e: - raise ValueError, 'invalid date for revision %s: %s' % (revision, e,) + except ValueError as e: + raise ValueError('invalid date for revision %s: %s' % (revision, e,)) # Parse author ### NOTE: authors containing whitespace are violations of the ### RCS specification. We are making an allowance here because ### CVSNT is known to produce these sorts of authors. - self.ts.match('author') - author = ' '.join(self._read_until_semicolon()) + self.ts.match(b'author') + author = b' '.join(self._read_until_semicolon()) # Parse state - self.ts.match('state') - state = '' + self.ts.match(b'state') + state = b'' while 1: token = self.ts.get() - if token == ';': + if token == b';': break - state = state + token + ' ' + state = state + token + b' ' state = state[:-1] # toss the trailing space # Parse branches - self.ts.match('branches') + self.ts.match(b'branches') branches = self._read_until_semicolon() # Parse revision of next delta in chain - self.ts.match('next') + self.ts.match(b'next') next = self.ts.get() - if next == ';': + if next == b';': next = None else: - self.ts.match(';') + self.ts.match(b';') # there are some files with extra tags in them. for example: # owner 640; @@ -410,12 +432,21 @@ def _parse_rcs_tree_entry(self, revision): # this is "newphrase" in RCSFILE(5). we just want to skip over these. while 1: token = self.ts.get() - if token == 'desc' or token[0] in string.digits: + if token == b'desc' or token[0] in DIGITS: self.ts.unget(token) break # consume everything up to the semicolon self._read_until_semicolon() + if PY3: + revision = revision.decode('ascii', 'surrogateescape') + if author is not None: + author = author.decode('ascii', 'surrogateescape') + if state is not None: + state = state.decode('ascii', 'surrogateescape') + branches = [b.decode('ascii', 'surrogateescape') for b in branches] + if next is not None: + next = next.decode('ascii', 'surrogateescape') self.sink.define_revision(revision, timestamp, author, state, branches, next) @@ -424,15 +455,19 @@ def parse_rcs_tree(self): revision = self.ts.get() # End of RCS tree description ? - if revision == 'desc': + if revision == b'desc': self.ts.unget(revision) return self._parse_rcs_tree_entry(revision) def parse_rcs_description(self): - self.ts.match('desc') - self.sink.set_description(self.ts.get()) + self.ts.match(b'desc') + if PY3: + self.sink.set_description(self.ts.get().decode('ascii', + 'surrogateescape')) + else: + self.sink.set_description(self.ts.get()) def parse_rcs_deltatext(self): while 1: @@ -441,12 +476,16 @@ def parse_rcs_deltatext(self): # EOF break text, sym2, log, sym1 = self.ts.mget(4) - if sym1 != 'log': - print `text[:100], sym2[:100], log[:100], sym1[:100]` - raise RCSExpected(sym1, 'log') - if sym2 != 'text': - raise RCSExpected(sym2, 'text') + if sym1 != b'log': + print(repr((text[:100], sym2[:100], log[:100], sym1[:100]))) + raise RCSExpected(sym1, b'log') + if sym2 != b'text': + raise RCSExpected(sym2, b'text') ### need to add code to chew up "newphrase" + if PY3: + revision = revision.decode('ascii', 'surrogateescape') + log = log.decode('ascii', 'surrogateescape') + text = text.decode('ascii', 'surrogateescape') self.sink.set_revision_info(revision, log, text) def parse(self, file, sink): diff --git a/cvs2svn_rcsparse/debug.py b/cvs2svn_rcsparse/debug.py index c0f693fe9..4665abbb3 100644 --- a/cvs2svn_rcsparse/debug.py +++ b/cvs2svn_rcsparse/debug.py @@ -1,6 +1,6 @@ # -*-python-*- # -# Copyright (C) 1999-2014 The ViewCVS Group. All Rights Reserved. +# Copyright (C) 1999-2020 The ViewCVS Group. All Rights Reserved. # # By using this file, you agree to the terms and conditions set forth in # the LICENSE.html file which can be found at the top level of the ViewVC @@ -14,39 +14,39 @@ import time -from __init__ import parse -import common +from .__init__ import parse +from . import common class DebugSink(common.Sink): def set_head_revision(self, revision): - print 'head:', revision + print('head:', revision) def set_principal_branch(self, branch_name): - print 'branch:', branch_name + print('branch:', branch_name) def define_tag(self, name, revision): - print 'tag:', name, '=', revision + print('tag:', name, '=', revision) def set_comment(self, comment): - print 'comment:', comment + print('comment:', comment) def set_description(self, description): - print 'description:', description + print('description:', description) def define_revision(self, revision, timestamp, author, state, branches, next): - print 'revision:', revision - print ' timestamp:', timestamp - print ' author:', author - print ' state:', state - print ' branches:', branches - print ' next:', next + print('revision:', revision) + print(' timestamp:', timestamp) + print(' author:', author) + print(' state:', state) + print(' branches:', branches) + print(' next:', next) def set_revision_info(self, revision, log, text): - print 'revision:', revision - print ' log:', log - print ' text:', text[:100], '...' + print('revision:', revision) + print(' log:', log) + print(' text:', text[:100], '...') class DumpSink(common.Sink): @@ -62,32 +62,32 @@ def __init__(self): import sha def set_head_revision(self, revision): - print revision + print(revision) def set_principal_branch(self, branch_name): - print branch_name + print(branch_name) def define_tag(self, name, revision): - print name, revision + print(name, revision) def set_comment(self, comment): - print comment + print(comment) def set_description(self, description): - print description + print(description) def define_revision(self, revision, timestamp, author, state, branches, next): - print revision, timestamp, author, state, branches, next + print(revision, timestamp, author, state, branches, next) def set_revision_info(self, revision, log, text): - print revision, sha.new(log).hexdigest(), sha.new(text).hexdigest() + print(revision, sha.new(log).hexdigest(), sha.new(text).hexdigest()) def tree_completed(self): - print 'tree_completed' + print('tree_completed') def parse_completed(self): - print 'parse_completed' + print('parse_completed') def dump_file(fname): @@ -99,15 +99,15 @@ def time_file(fname): t = time.time() parse(f, s) t = time.time() - t - print t + print(t) def _usage(): - print 'This is normally a module for importing, but it has a couple' - print 'features for testing as an executable script.' - print 'USAGE: %s COMMAND filename,v' % sys.argv[0] - print ' where COMMAND is one of:' - print ' dump: filename is "dumped" to stdout' - print ' time: filename is parsed with the time written to stdout' + print('This is normally a module for importing, but it has a couple') + print('features for testing as an executable script.') + print('USAGE: %s COMMAND filename,v' % sys.argv[0]) + print(' where COMMAND is one of:') + print(' dump: filename is "dumped" to stdout') + print(' time: filename is parsed with the time written to stdout') sys.exit(1) if __name__ == '__main__': diff --git a/cvs2svn_rcsparse/default.py b/cvs2svn_rcsparse/default.py index 24825e9a6..750f1ceca 100644 --- a/cvs2svn_rcsparse/default.py +++ b/cvs2svn_rcsparse/default.py @@ -1,6 +1,6 @@ # -*-python-*- # -# Copyright (C) 1999-2014 The ViewCVS Group. All Rights Reserved. +# Copyright (C) 1999-2020 The ViewCVS Group. All Rights Reserved. # # By using this file, you agree to the terms and conditions set forth in # the LICENSE.html file which can be found at the top level of the ViewVC @@ -15,11 +15,19 @@ # # ----------------------------------------------------------------------- +import sys import string -import common +from . import common + +if sys.version_info[0] >= 3: + PY3 = True + WHITESPACE = string.whitespace.encode('ascii') +else: + PY3 = False + WHITESPACE = string.whitespace class _TokenStream: - token_term = string.whitespace + ";:" + token_term = WHITESPACE + b";:" try: token_term = frozenset(token_term) except NameError: @@ -36,8 +44,8 @@ def __init__(self, file): self.rcsfile = file self.idx = 0 self.buf = self.rcsfile.read(self.CHUNK_SIZE) - if self.buf == '': - raise RuntimeError, 'EOF' + if self.buf == b'': + raise RuntimeError('EOF') def get(self): "Get the next token from the RCS file." @@ -54,26 +62,26 @@ def get(self): while 1: if idx == lbuf: buf = self.rcsfile.read(self.CHUNK_SIZE) - if buf == '': + if buf == b'': # signal EOF by returning None as the token del self.buf # so we fail if get() is called again return None lbuf = len(buf) idx = 0 - if buf[idx] not in string.whitespace: + if buf[idx] not in WHITESPACE: break idx = idx + 1 - if buf[idx] in ';:': + if buf[idx] in b';:': self.buf = buf self.idx = idx + 1 - return buf[idx] + return buf[idx:idx+1] - if buf[idx] != '@': + if buf[idx:idx+1] != b'@': end = idx + 1 - token = '' + token = b'' while 1: # find token characters in the current buffer while end < lbuf and buf[end] not in self.token_term: @@ -87,7 +95,7 @@ def get(self): # we stopped at the end of the buffer, so we may have a partial token buf = self.rcsfile.read(self.CHUNK_SIZE) - if buf == '': + if buf == b'': # signal EOF by returning None as the token del self.buf # so we fail if get() is called again return None @@ -108,10 +116,10 @@ def get(self): if idx == lbuf: idx = 0 buf = self.rcsfile.read(self.CHUNK_SIZE) - if buf == '': - raise RuntimeError, 'EOF' + if buf == b'': + raise RuntimeError('EOF') lbuf = len(buf) - i = string.find(buf, '@', idx) + i = buf.find(b'@', idx) if i == -1: chunks.append(buf[idx:]) idx = lbuf @@ -119,12 +127,12 @@ def get(self): if i == lbuf - 1: chunks.append(buf[idx:i]) idx = 0 - buf = '@' + self.rcsfile.read(self.CHUNK_SIZE) - if buf == '@': - raise RuntimeError, 'EOF' + buf = b'@' + self.rcsfile.read(self.CHUNK_SIZE) + if buf == b'@': + raise RuntimeError('EOF') lbuf = len(buf) continue - if buf[i + 1] == '@': + if buf[i + 1:i + 2] == b'@': chunks.append(buf[idx:i+1]) idx = i + 2 continue @@ -134,12 +142,15 @@ def get(self): self.buf = buf self.idx = i + 1 - return string.join(chunks, '') + return b''.join(chunks) # _get = get # def get(self): token = self._get() - print 'T:', `token` + if PY3: + print('T:', repr(token.decode('ascii','surrogateescape'))) + else: + print('T:', repr(token)) return token def match(self, match): diff --git a/cvs2svn_rcsparse/parse_rcs_file.py b/cvs2svn_rcsparse/parse_rcs_file.py index 05ff5b07b..1adc2a278 100755 --- a/cvs2svn_rcsparse/parse_rcs_file.py +++ b/cvs2svn_rcsparse/parse_rcs_file.py @@ -53,11 +53,15 @@ def __getattr__(self, name): if __name__ == '__main__': # Since there is nontrivial logic in __init__.py, we have to import - # parse() via that file. First make sure that the directory + # parse() via that file. However, __init__.py uses relative import + # for the package now, so we must import it as a package: # containing this script is in the path: - sys.path.insert(0, os.path.dirname(sys.argv[0])) - from __init__ import parse + p_dir, p_name = os.path.split(os.path.dirname(os.path.abspath(sys.argv[0]))) + sys.path.insert(0, p_dir) + rcsparse = __import__(p_name) + + parse = rcsparse.parse if sys.argv[1:]: for path in sys.argv[1:]: diff --git a/cvs2svn_rcsparse/run-tests.py b/cvs2svn_rcsparse/run-tests.py index caa0fdce5..82c7f719e 100755 --- a/cvs2svn_rcsparse/run-tests.py +++ b/cvs2svn_rcsparse/run-tests.py @@ -13,7 +13,7 @@ # # This software consists of voluntary contributions made by many # individuals. For exact contribution history, see the revision -# history and logs, available at http://viewvc.tigris.org/. +# history and logs, available at https://github.com/viewvc/viewvc/. # ==================================================================== """Run tests of rcsparse code.""" @@ -21,7 +21,10 @@ import sys import os import glob -from cStringIO import StringIO +if sys.version_info[0] >= 3: + from io import StringIO +else: + from cStringIO import StringIO from difflib import Differ # Since there is nontrivial logic in __init__.py, we have to import @@ -29,10 +32,20 @@ # containing this script is in the path: script_dir = os.path.dirname(sys.argv[0]) sys.path.insert(0, script_dir) +# Since there is nontrivial logic in __init__.py, we have to import +# parse() via that file. However, __init__.py uses relative import +# for the package now, so we must import it as a package: +# containing this script is in the path: +p_dir, p_name = os.path.split(os.path.dirname(os.path.abspath(sys.argv[0]))) +sys.path.insert(0, p_dir) +script_dir = os.path.dirname(sys.argv[0]) -from __init__ import parse -from parse_rcs_file import LoggingSink +#from __init__ import parse +rcsparse = __import__(p_name) +parse = rcsparse.parse +sys.path.insert(0, script_dir) +from parse_rcs_file import LoggingSink test_dir = os.path.join(script_dir, 'test-data') @@ -46,14 +59,15 @@ f = StringIO() try: parse(open(filename, 'rb'), LoggingSink(f)) - except Exception, e: + except Exception as e: sys.stderr.write('Error parsing file: %s!\n' % (e,)) + raise all_tests_ok = 0 else: output = f.getvalue() expected_output_filename = filename[:-2] + '.out' - expected_output = open(expected_output_filename, 'rb').read() + expected_output = open(expected_output_filename, 'r').read() if output == expected_output: sys.stderr.write('OK\n') diff --git a/cvs2svn_rcsparse/texttools.py b/cvs2svn_rcsparse/texttools.py index a0a21568d..c7412402d 100644 --- a/cvs2svn_rcsparse/texttools.py +++ b/cvs2svn_rcsparse/texttools.py @@ -1,6 +1,6 @@ # -*-python-*- # -# Copyright (C) 1999-2014 The ViewCVS Group. All Rights Reserved. +# Copyright (C) 1999-2020 The ViewCVS Group. All Rights Reserved. # # By using this file, you agree to the terms and conditions set forth in # the LICENSE.html file which can be found at the top level of the ViewVC @@ -11,18 +11,20 @@ # ----------------------------------------------------------------------- import string +import sys # note: this will raise an ImportError if it isn't available. the rcsparse # package will recognize this and switch over to the default parser. from mx import TextTools -import common +from . import common +PY3 = (sys.version_info[0] >= 3) # for convenience _tt = TextTools -_idchar_list = map(chr, range(33, 127)) + map(chr, range(160, 256)) +_idchar_list = list(map(chr, list(range(33, 127)))) + list(map(chr, list(range(160, 256)))) _idchar_list.remove('$') _idchar_list.remove(',') #_idchar_list.remove('.') # leave as part of 'num' symbol @@ -136,7 +138,7 @@ def _parse_chunk(self, buf, start=0): # \ 4 6 12 14 / # \_______/_____/ \ / / # \ 13 / - # \__________________________________________/ + # \__________________________________________/ # # #1: Skip over any whitespace. # #2: If now EOF, exit with code _E_COMPLETE. @@ -317,7 +319,10 @@ def get(self): # _get = get # def get(self): token = self._get() - print 'T:', `token` + if PY3: + print('T:', repr(token.decode('ascii', 'surrogateescape'))) + else: + print('T:', repr(token)) return token def match(self, match): @@ -338,7 +343,7 @@ def mget(self, count): action = self._parse_more() if action == _EOF: ### fix this - raise RuntimeError, 'EOF hit while expecting tokens' + raise RuntimeError('EOF hit while expecting tokens') result = self.tokens[-count:] del self.tokens[-count:] return result