# (Be in -*- python -*- mode.) # # ==================================================================== # Copyright (c) 2007-2009 CollabNet. All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://subversion.tigris.org/license-1.html. # If newer versions of this license are posted there, you may use a # newer version instead, at your option. # # This software consists of voluntary contributions made by many # individuals. For exact contribution history, see the revision # history and logs, available at http://cvs2svn.tigris.org/. # ==================================================================== """Classes for outputting the converted repository to git. For information about the format allowed by git-fast-import, see: http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html """ import bisect import time from cvs2svn_lib.common import InternalError from cvs2svn_lib.log import logger from cvs2svn_lib.context import Ctx from cvs2svn_lib.symbol import Trunk from cvs2svn_lib.symbol import Branch from cvs2svn_lib.symbol import Tag from cvs2svn_lib.cvs_item import CVSSymbol from cvs2svn_lib.dvcs_common import DVCSOutputOption from cvs2svn_lib.dvcs_common import MirrorUpdater from cvs2svn_lib.key_generator import KeyGenerator class GitRevisionWriter(MirrorUpdater): def start(self, mirror, f): super(GitRevisionWriter, self).start(mirror) self.f = f def _modify_file(self, cvs_item, post_commit): raise NotImplementedError() def add_file(self, cvs_rev, post_commit): super(GitRevisionWriter, self).add_file(cvs_rev, post_commit) self._modify_file(cvs_rev, post_commit) def modify_file(self, cvs_rev, post_commit): super(GitRevisionWriter, self).modify_file(cvs_rev, post_commit) self._modify_file(cvs_rev, post_commit) def delete_file(self, cvs_rev, post_commit): super(GitRevisionWriter, self).delete_file(cvs_rev, post_commit) self.f.write('D %s\n' % (cvs_rev.cvs_file.cvs_path,)) def branch_file(self, cvs_symbol): super(GitRevisionWriter, self).branch_file(cvs_symbol) self._modify_file(cvs_symbol, post_commit=False) def finish(self): super(GitRevisionWriter, self).finish() del self.f class GitRevisionMarkWriter(GitRevisionWriter): def _modify_file(self, cvs_item, post_commit): if cvs_item.cvs_file.executable: mode = '100755' else: mode = '100644' self.f.write( 'M %s :%d %s\n' % (mode, cvs_item.revision_reader_token, cvs_item.cvs_file.cvs_path,) ) class GitRevisionInlineWriter(GitRevisionWriter): def __init__(self, revision_reader): self.revision_reader = revision_reader def register_artifacts(self, which_pass): GitRevisionWriter.register_artifacts(self, which_pass) self.revision_reader.register_artifacts(which_pass) def start(self, mirror, f): GitRevisionWriter.start(self, mirror, f) self.revision_reader.start() def _modify_file(self, cvs_item, post_commit): if cvs_item.cvs_file.executable: mode = '100755' else: mode = '100644' self.f.write( 'M %s inline %s\n' % (mode, cvs_item.cvs_file.cvs_path,) ) if isinstance(cvs_item, CVSSymbol): cvs_rev = cvs_item.get_cvs_revision_source(Ctx()._cvs_items_db) else: cvs_rev = cvs_item # FIXME: We have to decide what to do about keyword substitution # and eol_style here: fulltext = self.revision_reader.get_content(cvs_rev) self.f.write('data %d\n' % (len(fulltext),)) self.f.write(fulltext) self.f.write('\n') def finish(self): GitRevisionWriter.finish(self) self.revision_reader.finish() class GitOutputOption(DVCSOutputOption): """An OutputOption that outputs to a git-fast-import formatted file. Members: dump_filename -- (string) the name of the file to which the git-fast-import commands for defining revisions will be written. author_transforms -- a map from CVS author names to git full name and email address. See DVCSOutputOption.normalize_author_transforms() for information about the form of this parameter. """ name = "Git" # The first mark number used for git-fast-import commit marks. This # value needs to be large to avoid conflicts with blob marks. _first_commit_mark = 1000000000 def __init__( self, dump_filename, revision_writer, author_transforms=None, tie_tag_fixup_branches=False, ): """Constructor. DUMP_FILENAME is the name of the file to which the git-fast-import commands for defining revisions should be written. (Please note that depending on the style of revision writer, the actual file contents might not be written to this file.) REVISION_WRITER is a GitRevisionWriter that is used to output either the content of revisions or a mark that was previously used to label a blob. AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from CVS author names to git full name and email address. All of the contents should either be Unicode strings or 8-bit strings encoded as UTF-8. TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag fixup branch, it should be psuedo-merged (ancestry linked but no content changes) back into its source branch, to dispose of the open head. """ DVCSOutputOption.__init__(self) self.dump_filename = dump_filename self.revision_writer = revision_writer self.author_transforms = self.normalize_author_transforms( author_transforms ) self.tie_tag_fixup_branches = tie_tag_fixup_branches self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark) def register_artifacts(self, which_pass): DVCSOutputOption.register_artifacts(self, which_pass) self.revision_writer.register_artifacts(which_pass) def check_symbols(self, symbol_map): # FIXME: What constraints does git impose on symbols? pass def setup(self, svn_rev_count): DVCSOutputOption.setup(self, svn_rev_count) self.f = open(self.dump_filename, 'wb') # The youngest revnum that has been committed so far: self._youngest = 0 # A map {lod : [(revnum, mark)]} giving each of the revision # numbers in which there was a commit to lod, and the mark active # at the end of the revnum. self._marks = {} self.revision_writer.start(self._mirror, self.f) def _create_commit_mark(self, lod, revnum): mark = self._mark_generator.gen_id() self._set_lod_mark(lod, revnum, mark) return mark def _set_lod_mark(self, lod, revnum, mark): """Record MARK as the status of LOD for REVNUM. If there is already an entry for REVNUM, overwrite it. If not, append a new entry to the self._marks list for LOD.""" assert revnum >= self._youngest entry = (revnum, mark) try: modifications = self._marks[lod] except KeyError: # This LOD hasn't appeared before; create a new list and add the # entry: self._marks[lod] = [entry] else: # A record exists, so it necessarily has at least one element: if modifications[-1][0] == revnum: modifications[-1] = entry else: modifications.append(entry) self._youngest = revnum def _get_author(self, svn_commit): """Return the author to be used for SVN_COMMIT. Return the author as a UTF-8 string in the form needed by git fast-import; that is, 'name '.""" cvs_author = svn_commit.get_author() return self._map_author(cvs_author) def _map_author(self, cvs_author): return self.author_transforms.get(cvs_author, "%s <>" % (cvs_author,)) @staticmethod def _get_log_msg(svn_commit): return svn_commit.get_log_msg() def process_initial_project_commit(self, svn_commit): self._mirror.start_commit(svn_commit.revnum) self._mirror.end_commit() def process_primary_commit(self, svn_commit): author = self._get_author(svn_commit) log_msg = self._get_log_msg(svn_commit) lods = set() for cvs_rev in svn_commit.get_cvs_items(): lods.add(cvs_rev.lod) if len(lods) != 1: raise InternalError('Commit affects %d LODs' % (len(lods),)) lod = lods.pop() self._mirror.start_commit(svn_commit.revnum) if isinstance(lod, Trunk): # FIXME: is this correct?: self.f.write('commit refs/heads/master\n') else: self.f.write('commit refs/heads/%s\n' % (lod.name,)) self.f.write( 'mark :%d\n' % (self._create_commit_mark(lod, svn_commit.revnum),) ) self.f.write( 'committer %s %d +0000\n' % (author, svn_commit.date,) ) self.f.write('data %d\n' % (len(log_msg),)) self.f.write('%s\n' % (log_msg,)) for cvs_rev in svn_commit.get_cvs_items(): self.revision_writer.process_revision(cvs_rev, post_commit=False) self.f.write('\n') self._mirror.end_commit() def process_post_commit(self, svn_commit): author = self._get_author(svn_commit) log_msg = self._get_log_msg(svn_commit) source_lods = set() for cvs_rev in svn_commit.cvs_revs: source_lods.add(cvs_rev.lod) if len(source_lods) != 1: raise InternalError('Commit is from %d LODs' % (len(source_lods),)) source_lod = source_lods.pop() self._mirror.start_commit(svn_commit.revnum) # FIXME: is this correct?: self.f.write('commit refs/heads/master\n') self.f.write( 'mark :%d\n' % (self._create_commit_mark(None, svn_commit.revnum),) ) self.f.write( 'committer %s %d +0000\n' % (author, svn_commit.date,) ) self.f.write('data %d\n' % (len(log_msg),)) self.f.write('%s\n' % (log_msg,)) self.f.write( 'merge :%d\n' % (self._get_source_mark(source_lod, svn_commit.revnum),) ) for cvs_rev in svn_commit.cvs_revs: self.revision_writer.process_revision(cvs_rev, post_commit=True) self.f.write('\n') self._mirror.end_commit() def _get_source_mark(self, source_lod, revnum): """Return the mark active on SOURCE_LOD at the end of REVNUM.""" modifications = self._marks[source_lod] i = bisect.bisect_left(modifications, (revnum + 1,)) - 1 (revnum, mark) = modifications[i] return mark def describe_lod_to_user(self, lod): """This needs to make sense to users of the fastimported result.""" if isinstance(lod, Trunk): return 'master' else: return lod.name def _describe_commit(self, svn_commit, lod): author = self._map_author(svn_commit.get_author()) if author.endswith(" <>"): author = author[:-3] date = time.strftime( "%Y-%m-%d %H:%M:%S UTC", time.gmtime(svn_commit.date) ) log_msg = svn_commit.get_log_msg() if log_msg.find('\n') != -1: log_msg = log_msg[:log_msg.index('\n')] return "%s %s %s '%s'" % ( self.describe_lod_to_user(lod), date, author, log_msg,) def _process_symbol_commit(self, svn_commit, git_branch, source_groups): author = self._get_author(svn_commit) log_msg = self._get_log_msg(svn_commit) # There are two distinct cases we need to care for here: # 1. initial creation of a LOD # 2. fixup of an existing LOD to include more files, because the LOD in # CVS was created piecemeal over time, with intervening commits # We look at _marks here, but self._mirror._get_lod_history(lod).exists() # might be technically more correct (though _get_lod_history is currently # underscore-private) is_initial_lod_creation = svn_commit.symbol not in self._marks # Create the mark, only after the check above mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum) if is_initial_lod_creation: # Get the primary parent p_source_revnum, p_source_lod, p_cvs_symbols = source_groups[0] try: p_source_node = self._mirror.get_old_lod_directory( p_source_lod, p_source_revnum ) except KeyError: raise InternalError('Source %r does not exist' % (p_source_lod,)) cvs_files_to_delete = set(self._get_all_files(p_source_node)) for (source_revnum, source_lod, cvs_symbols,) in source_groups: for cvs_symbol in cvs_symbols: cvs_files_to_delete.discard(cvs_symbol.cvs_file) # Write a trailer to the log message which describes the cherrypicks that # make up this symbol creation. log_msg += "\n" if is_initial_lod_creation: log_msg += "\nSprout from %s" % ( self._describe_commit( Ctx()._persistence_manager.get_svn_commit(p_source_revnum), p_source_lod ), ) for (source_revnum, source_lod, cvs_symbols,) \ in source_groups[(is_initial_lod_creation and 1 or 0):]: log_msg += "\nCherrypick from %s:" % ( self._describe_commit( Ctx()._persistence_manager.get_svn_commit(source_revnum), source_lod ), ) for cvs_path in sorted( cvs_symbol.cvs_file.cvs_path for cvs_symbol in cvs_symbols ): log_msg += "\n %s" % (cvs_path,) if is_initial_lod_creation: if cvs_files_to_delete: log_msg += "\nDelete:" for cvs_path in sorted( cvs_file.cvs_path for cvs_file in cvs_files_to_delete ): log_msg += "\n %s" % (cvs_path,) self.f.write('commit %s\n' % (git_branch,)) self.f.write('mark :%d\n' % (mark,)) self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,)) self.f.write('data %d\n' % (len(log_msg),)) self.f.write('%s\n' % (log_msg,)) # Only record actual DVCS ancestry for the primary sprout parent, # all the rest are effectively cherrypicks. if is_initial_lod_creation: self.f.write( 'from :%d\n' % (self._get_source_mark(p_source_lod, p_source_revnum),) ) for (source_revnum, source_lod, cvs_symbols,) in source_groups: for cvs_symbol in cvs_symbols: self.revision_writer.branch_file(cvs_symbol) if is_initial_lod_creation: for cvs_file in cvs_files_to_delete: self.f.write('D %s\n' % (cvs_file.cvs_path,)) self.f.write('\n') return mark def process_branch_commit(self, svn_commit): self._mirror.start_commit(svn_commit.revnum) source_groups = self._get_source_groups(svn_commit) if self._is_simple_copy(svn_commit, source_groups): (source_revnum, source_lod, cvs_symbols) = source_groups[0] logger.debug( '%s will be created via a simple copy from %s:r%d' % (svn_commit.symbol, source_lod, source_revnum,) ) mark = self._get_source_mark(source_lod, source_revnum) self._set_symbol(svn_commit.symbol, mark) self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum) self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark) else: logger.debug( '%s will be created via fixup commit(s)' % (svn_commit.symbol,) ) self._process_symbol_commit( svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,), source_groups, ) self._mirror.end_commit() def _set_symbol(self, symbol, mark): if isinstance(symbol, Branch): category = 'heads' elif isinstance(symbol, Tag): category = 'tags' else: raise InternalError() self.f.write('reset refs/%s/%s\n' % (category, symbol.name,)) self.f.write('from :%d\n' % (mark,)) def get_tag_fixup_branch_name(self, svn_commit): # The branch name to use for the "tag fixup branches". The # git-fast-import documentation suggests using 'TAG_FIXUP' # (outside of the refs/heads namespace), but this is currently # broken. Use a name containing '.', which is not allowed in CVS # symbols, to avoid conflicts (though of course a conflict could # still result if the user requests symbol transformations). return 'refs/heads/TAG.FIXUP' def process_tag_commit(self, svn_commit): # FIXME: For now we create a fixup branch with the same name as # the tag, then the tag. We never delete the fixup branch. self._mirror.start_commit(svn_commit.revnum) source_groups = self._get_source_groups(svn_commit) if self._is_simple_copy(svn_commit, source_groups): (source_revnum, source_lod, cvs_symbols) = source_groups[0] logger.debug( '%s will be created via a simple copy from %s:r%d' % (svn_commit.symbol, source_lod, source_revnum,) ) mark = self._get_source_mark(source_lod, source_revnum) self._set_symbol(svn_commit.symbol, mark) self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum) self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark) else: logger.debug( '%s will be created via a fixup branch' % (svn_commit.symbol,) ) fixup_branch_name = self.get_tag_fixup_branch_name(svn_commit) # Create the fixup branch (which might involve making more than # one commit): mark = self._process_symbol_commit( svn_commit, fixup_branch_name, source_groups ) # Store the mark of the last commit to the fixup branch as the # value of the tag: self._set_symbol(svn_commit.symbol, mark) self.f.write('reset %s\n' % (fixup_branch_name,)) self.f.write('\n') if self.tie_tag_fixup_branches: source_lod = source_groups[0][1] source_lod_git_branch = \ 'refs/heads/%s' % (getattr(source_lod, 'name', 'master'),) mark2 = self._create_commit_mark(source_lod, svn_commit.revnum) author = self._map_author(Ctx().username) log_msg = self._get_log_msg_for_ancestry_tie(svn_commit) self.f.write('commit %s\n' % (source_lod_git_branch,)) self.f.write('mark :%d\n' % (mark2,)) self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,)) self.f.write('data %d\n' % (len(log_msg),)) self.f.write('%s\n' % (log_msg,)) self.f.write( 'merge :%d\n' % (mark,) ) self.f.write('\n') self._mirror.end_commit() def _get_log_msg_for_ancestry_tie(self, svn_commit): return Ctx().text_wrapper.fill( Ctx().tie_tag_ancestry_message % { 'symbol_name' : svn_commit.symbol.name, } ) def cleanup(self): DVCSOutputOption.cleanup(self) self.revision_writer.finish() self.f.close() del self.f