# (Be in -*- python -*- mode.)
#
# ====================================================================
# Copyright (c) 2000-2007 CollabNet.  All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution.  The terms
# are also available at http://subversion.tigris.org/license-1.html.
# If newer versions of this license are posted there, you may use a
# newer version instead, at your option.
#
# This software consists of voluntary contributions made by many
# individuals.  For exact contribution history, see the revision
# history and logs, available at http://cvs2svn.tigris.org/.
# ====================================================================

"""This module defines the passes that make up a conversion."""


import sys
import os
import shutil
import cPickle

from cvs2svn_lib import config
from cvs2svn_lib.context import Ctx
from cvs2svn_lib.common import warning_prefix
from cvs2svn_lib.common import FatalException
from cvs2svn_lib.common import FatalError
from cvs2svn_lib.common import InternalError
from cvs2svn_lib.common import DB_OPEN_NEW
from cvs2svn_lib.common import DB_OPEN_READ
from cvs2svn_lib.common import DB_OPEN_WRITE
from cvs2svn_lib.common import Timestamper
from cvs2svn_lib.log import Log
from cvs2svn_lib.pass_manager import Pass
from cvs2svn_lib.serializer import PrimedPickleSerializer
from cvs2svn_lib.artifact_manager import artifact_manager
from cvs2svn_lib.cvs_file_database import CVSFileDatabase
from cvs2svn_lib.metadata_database import MetadataDatabase
from cvs2svn_lib.project import read_projects
from cvs2svn_lib.project import write_projects
from cvs2svn_lib.symbol import LineOfDevelopment
from cvs2svn_lib.symbol import Trunk
from cvs2svn_lib.symbol import Symbol
from cvs2svn_lib.symbol import Branch
from cvs2svn_lib.symbol import Tag
from cvs2svn_lib.symbol import ExcludedSymbol
from cvs2svn_lib.symbol_database import SymbolDatabase
from cvs2svn_lib.symbol_database import create_symbol_database
from cvs2svn_lib.symbol_statistics import SymbolPlanError
from cvs2svn_lib.symbol_statistics import IndeterminateSymbolException
from cvs2svn_lib.symbol_statistics import SymbolStatistics
from cvs2svn_lib.cvs_item import CVSRevision
from cvs2svn_lib.cvs_item import CVSSymbol
from cvs2svn_lib.cvs_item_database import OldCVSItemStore
from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
from cvs2svn_lib.cvs_item_database import cvs_item_primer
from cvs2svn_lib.cvs_item_database import NewSortableCVSRevisionDatabase
from cvs2svn_lib.cvs_item_database import OldSortableCVSRevisionDatabase
from cvs2svn_lib.cvs_item_database import NewSortableCVSSymbolDatabase
from cvs2svn_lib.cvs_item_database import OldSortableCVSSymbolDatabase
from cvs2svn_lib.key_generator import KeyGenerator
from cvs2svn_lib.changeset import RevisionChangeset
from cvs2svn_lib.changeset import OrderedChangeset
from cvs2svn_lib.changeset import SymbolChangeset
from cvs2svn_lib.changeset import BranchChangeset
from cvs2svn_lib.changeset import create_symbol_changeset
from cvs2svn_lib.changeset_graph import ChangesetGraph
from cvs2svn_lib.changeset_graph_link import ChangesetGraphLink
from cvs2svn_lib.changeset_database import ChangesetDatabase
from cvs2svn_lib.changeset_database import CVSItemToChangesetTable
from cvs2svn_lib.svn_commit import SVNRevisionCommit
from cvs2svn_lib.openings_closings import SymbolingsLogger
from cvs2svn_lib.svn_commit_creator import SVNCommitCreator
from cvs2svn_lib.persistence_manager import PersistenceManager
from cvs2svn_lib.collect_data import CollectData
from cvs2svn_lib.process import call_command
from cvs2svn_lib.check_dependencies_pass \
    import CheckItemStoreDependenciesPass
from cvs2svn_lib.check_dependencies_pass \
    import CheckIndexedItemStoreDependenciesPass


def sort_file(infilename, outfilename, options=[]):
  """Sort file INFILENAME, storing the results to OUTFILENAME.

  OPTIONS is an optional list of strings that are passed as additional
  options to the sort command."""

  # GNU sort will sort our dates differently (incorrectly!) if our
  # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
  # it to 'C'
  lc_all_tmp = os.environ.get('LC_ALL', None)
  os.environ['LC_ALL'] = 'C'

  # The -T option to sort has a nice side effect.  The Win32 sort is
  # case insensitive and cannot be used, and since it does not
  # understand the -T option and dies if we try to use it, there is no
  # risk that we use that sort by accident.
  command = [
      Ctx().sort_executable,
      '-T', Ctx().tmpdir
      ] + options + [
      infilename
      ]

  try:
    call_command(command, stdout=open(outfilename, 'w'))
  finally:
    if lc_all_tmp is None:
      del os.environ['LC_ALL']
    else:
      os.environ['LC_ALL'] = lc_all_tmp

  # On some versions of Windows, os.system() does not return an error
  # if the command fails.  So add little consistency tests here that
  # the output file was created and has the right size:

  if not os.path.exists(outfilename):
    raise FatalError('Sort output file missing: %r' % (outfilename,))

  if os.path.getsize(outfilename) != os.path.getsize(infilename):
    raise FatalError(
        'Sort input and output file sizes differ:\n'
        '    %r (%d bytes)\n'
        '    %r (%d bytes)' % (
            infilename, os.path.getsize(infilename),
            outfilename, os.path.getsize(outfilename),
            )
        )


class CollectRevsPass(Pass):
  """This pass was formerly known as pass1."""

  def register_artifacts(self):
    self._register_temp_file(config.PROJECTS)
    self._register_temp_file(config.SYMBOL_STATISTICS)
    self._register_temp_file(config.METADATA_INDEX_TABLE)
    self._register_temp_file(config.METADATA_STORE)
    self._register_temp_file(config.CVS_FILES_DB)
    self._register_temp_file(config.CVS_ITEMS_STORE)
    Ctx().revision_recorder.register_artifacts(self)

  def run(self, run_options, stats_keeper):
    Log().quiet("Examining all CVS ',v' files...")
    Ctx()._projects = {}
    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_NEW)
    cd = CollectData(Ctx().revision_recorder, stats_keeper)
    for project in run_options.projects:
      cd.process_project(project)
    run_options.projects = None

    fatal_errors = cd.close()

    if fatal_errors:
      raise FatalException("Pass 1 complete.\n"
                           + "=" * 75 + "\n"
                           + "Error summary:\n"
                           + "\n".join(fatal_errors) + "\n"
                           + "Exited due to fatal error(s).")

    Ctx()._cvs_file_db.close()
    write_projects(artifact_manager.get_temp_file(config.PROJECTS))
    Log().quiet("Done")


class CleanMetadataPass(Pass):
  """Clean up CVS revision metadata and write it to a new database."""

  def register_artifacts(self):
    self._register_temp_file(config.METADATA_CLEAN_INDEX_TABLE)
    self._register_temp_file(config.METADATA_CLEAN_STORE)
    self._register_temp_file_needed(config.METADATA_INDEX_TABLE)
    self._register_temp_file_needed(config.METADATA_STORE)

  def _get_clean_author(self, author):
    """Return AUTHOR, converted appropriately to UTF8.

    Raise a UnicodeException if it cannot be converted using the
    configured cvs_author_decoder."""

    try:
      return self._authors[author]
    except KeyError:
      pass

    try:
      clean_author = Ctx().cvs_author_decoder(author)
    except UnicodeError:
      self._authors[author] = author
      raise UnicodeError('Problem decoding author \'%s\'' % (author,))

    try:
      clean_author = clean_author.encode('utf8')
    except UnicodeError:
      self._authors[author] = author
      raise UnicodeError('Problem encoding author \'%s\'' % (author,))

    self._authors[author] = clean_author
    return clean_author

  def _get_clean_log_msg(self, log_msg):
    """Return LOG_MSG, converted appropriately to UTF8.

    Raise a UnicodeException if it cannot be converted using the
    configured cvs_log_decoder."""

    try:
      clean_log_msg = Ctx().cvs_log_decoder(log_msg)
    except UnicodeError:
      raise UnicodeError(
          'Problem decoding log message:\n'
          '%s\n'
          '%s\n'
          '%s'
          % ('-' * 75, log_msg, '-' * 75,)
          )

    try:
      return clean_log_msg.encode('utf8')
    except UnicodeError:
      raise UnicodeError(
          'Problem encoding log message:\n'
          '%s\n'
          '%s\n'
          '%s'
          % ('-' * 75, log_msg, '-' * 75,)
          )

  def _clean_metadata(self, metadata):
    """Clean up METADATA by overwriting its members as necessary."""

    try:
      metadata.author = self._get_clean_author(metadata.author)
    except UnicodeError, e:
      Log().warn('%s: %s' % (warning_prefix, e,))
      self.warnings = True

    try:
      metadata.log_msg = self._get_clean_log_msg(metadata.log_msg)
    except UnicodeError, e:
      Log().warn('%s: %s' % (warning_prefix, e,))
      self.warnings = True

  def run(self, run_options, stats_keeper):
    Log().quiet("Converting metadata to UTF8...")
    metadata_db = MetadataDatabase(
        artifact_manager.get_temp_file(config.METADATA_STORE),
        artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
        DB_OPEN_READ,
        )
    metadata_clean_db = MetadataDatabase(
        artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
        artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
        DB_OPEN_NEW,
        )

    self.warnings = False

    # A map {author : clean_author} for those known (to avoid
    # repeating warnings):
    self._authors = {}

    for id in metadata_db.iterkeys():
      metadata = metadata_db[id]

      # Record the original author name because it might be needed for
      # expanding CVS keywords:
      metadata.original_author = metadata.author

      self._clean_metadata(metadata)

      metadata_clean_db[id] = metadata

    if self.warnings:
      raise FatalError(
          'There were warnings converting author names and/or log messages\n'
          'to unicode (see messages above).  Please restart this pass\n'
          'with one or more \'--encoding\' parameters or with\n'
          '\'--fallback-encoding\'.'
          )

    metadata_clean_db.close()
    metadata_db.close()
    Log().quiet("Done")


class CollateSymbolsPass(Pass):
  """Divide symbols into branches, tags, and excludes."""

  conversion_names = {
      Trunk : 'trunk',
      Branch : 'branch',
      Tag : 'tag',
      ExcludedSymbol : 'exclude',
      Symbol : '.',
      }

  def register_artifacts(self):
    self._register_temp_file(config.SYMBOL_DB)
    self._register_temp_file_needed(config.PROJECTS)
    self._register_temp_file_needed(config.SYMBOL_STATISTICS)

  def get_symbol(self, run_options, stats):
    """Use StrategyRules to decide what to do with a symbol.

    STATS is an instance of symbol_statistics._Stats describing an
    instance of Symbol or Trunk.  To determine how the symbol is to be
    converted, consult the StrategyRules in the project's
    symbol_strategy_rules.  Each rule is allowed a chance to change
    the way the symbol will be converted.  If the symbol is not a
    Trunk or TypedSymbol after all rules have run, raise
    IndeterminateSymbolException."""

    symbol = stats.lod
    rules = run_options.project_symbol_strategy_rules[symbol.project.id]
    for rule in rules:
      symbol = rule.get_symbol(symbol, stats)
      assert symbol is not None

    stats.check_valid(symbol)

    return symbol

  def log_symbol_summary(self, stats, symbol):
    if not self.symbol_info_file:
      return

    if isinstance(symbol, Trunk):
      name = '.trunk.'
      preferred_parent_name = '.'
    else:
      name = stats.lod.name
      if symbol.preferred_parent_id is None:
        preferred_parent_name = '.'
      else:
        preferred_parent = self.symbol_stats[symbol.preferred_parent_id].lod
        if isinstance(preferred_parent, Trunk):
          preferred_parent_name = '.trunk.'
        else:
          preferred_parent_name = preferred_parent.name

    if isinstance(symbol, LineOfDevelopment) and symbol.base_path:
      symbol_path = symbol.base_path
    else:
      symbol_path = '.'

    self.symbol_info_file.write(
        '%-5d %-30s %-10s %s %s\n' % (
            stats.lod.project.id,
            name,
            self.conversion_names[symbol.__class__],
            symbol_path,
            preferred_parent_name,
            )
        )
    self.symbol_info_file.write('      # %s\n' % (stats,))
    parent_counts = stats.possible_parents.items()
    if parent_counts:
      self.symbol_info_file.write('      # Possible parents:\n')
      parent_counts.sort(lambda a,b: cmp((b[1], a[0]), (a[1], b[0])))
      for (pp, count) in parent_counts:
        if isinstance(pp, Trunk):
          self.symbol_info_file.write(
              '      #     .trunk. : %d\n' % (count,)
              )
        else:
          self.symbol_info_file.write(
              '      #     %s : %d\n' % (pp.name, count,)
              )

  def get_symbols(self, run_options):
    """Return a map telling how to convert symbols.

    The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
    indicating how each symbol should be converted.  Trunk objects in
    SYMBOL_STATS are passed through unchanged.  One object is included
    in the return value for each line of development described in
    SYMBOL_STATS.

    Raise FatalError if there was an error."""

    errors = []
    mismatches = []

    if Ctx().symbol_info_filename is not None:
      self.symbol_info_file = open(Ctx().symbol_info_filename, 'w')
      self.symbol_info_file.write(
          '# Columns: project_id symbol_name conversion symbol_path '
          'preferred_parent_name\n'
          )
    else:
      self.symbol_info_file = None

    # Initialize each symbol strategy rule a single time, even if it
    # is used in more than one project.  First define a map from
    # object id to symbol strategy rule:
    rules = {}
    for rule_list in run_options.project_symbol_strategy_rules:
      for rule in rule_list:
        rules[id(rule)] = rule

    for rule in rules.itervalues():
      rule.start(self.symbol_stats)

    retval = {}

    for stats in self.symbol_stats:
      try:
        symbol = self.get_symbol(run_options, stats)
      except IndeterminateSymbolException, e:
        self.log_symbol_summary(stats, stats.lod)
        mismatches.append(e.stats)
      except SymbolPlanError, e:
        self.log_symbol_summary(stats, stats.lod)
        errors.append(e)
      else:
        self.log_symbol_summary(stats, symbol)
        retval[stats.lod] = symbol

    for rule in rules.itervalues():
      rule.finish()

    if self.symbol_info_file:
      self.symbol_info_file.close()

    del self.symbol_info_file

    if errors or mismatches:
      s = ['Problems determining how symbols should be converted:\n']
      for e in errors:
        s.append('%s\n' % (e,))
      if mismatches:
        s.append(
            'It is not clear how the following symbols '
            'should be converted.\n'
            'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
            'and/or\n'
            '--symbol-default to resolve the ambiguity.\n'
            )
        for stats in mismatches:
          s.append('    %s\n' % (stats,))
      raise FatalError(''.join(s))
    else:
      return retval

  def run(self, run_options, stats_keeper):
    Ctx()._projects = read_projects(
        artifact_manager.get_temp_file(config.PROJECTS)
        )
    self.symbol_stats = SymbolStatistics(
        artifact_manager.get_temp_file(config.SYMBOL_STATISTICS)
        )

    symbol_map = self.get_symbols(run_options)

    # Check the symbols for consistency and bail out if there were errors:
    self.symbol_stats.check_consistency(symbol_map)

    # Check that the symbols all have SVN paths set and that the paths
    # are disjoint:
    Ctx().output_option.check_symbols(symbol_map)

    for symbol in symbol_map.itervalues():
      if isinstance(symbol, ExcludedSymbol):
        self.symbol_stats.exclude_symbol(symbol)

    create_symbol_database(symbol_map.values())

    del self.symbol_stats

    Log().quiet("Done")


class FilterSymbolsPass(Pass):
  """Delete any branches/tags that are to be excluded.

  Also delete revisions on excluded branches, and delete other
  references to the excluded symbols."""

  def register_artifacts(self):
    self._register_temp_file(config.SUMMARY_SERIALIZER)
    self._register_temp_file(config.CVS_REVS_SUMMARY_DATAFILE)
    self._register_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE)
    self._register_temp_file_needed(config.PROJECTS)
    self._register_temp_file_needed(config.SYMBOL_DB)
    self._register_temp_file_needed(config.CVS_FILES_DB)
    self._register_temp_file_needed(config.CVS_ITEMS_STORE)
    Ctx().revision_excluder.register_artifacts(self)

  def run(self, run_options, stats_keeper):
    Ctx()._projects = read_projects(
        artifact_manager.get_temp_file(config.PROJECTS)
        )
    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
    Ctx()._symbol_db = SymbolDatabase()
    cvs_item_store = OldCVSItemStore(
        artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))

    cvs_item_serializer = PrimedPickleSerializer(cvs_item_primer)
    f = open(artifact_manager.get_temp_file(config.SUMMARY_SERIALIZER), 'wb')
    cPickle.dump(cvs_item_serializer, f, -1)
    f.close()

    rev_db = NewSortableCVSRevisionDatabase(
        artifact_manager.get_temp_file(config.CVS_REVS_SUMMARY_DATAFILE),
        cvs_item_serializer,
        )

    symbol_db = NewSortableCVSSymbolDatabase(
        artifact_manager.get_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE),
        cvs_item_serializer,
        )

    revision_excluder = Ctx().revision_excluder

    Log().quiet("Filtering out excluded symbols and summarizing items...")

    stats_keeper.reset_cvs_rev_info()
    revision_excluder.start()

    # Process the cvs items store one file at a time:
    for cvs_file_items in cvs_item_store.iter_cvs_file_items():
      Log().verbose(cvs_file_items.cvs_file.filename)
      cvs_file_items.filter_excluded_symbols(revision_excluder)
      cvs_file_items.mutate_symbols()
      cvs_file_items.adjust_parents()
      cvs_file_items.refine_symbols()
      cvs_file_items.record_opened_symbols()
      cvs_file_items.record_closed_symbols()
      cvs_file_items.check_link_consistency()

      # Store whatever is left to the new file and update statistics:
      stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
      for cvs_item in cvs_file_items.values():
        stats_keeper.record_cvs_item(cvs_item)

        if isinstance(cvs_item, CVSRevision):
          rev_db.add(cvs_item)
        elif isinstance(cvs_item, CVSSymbol):
          symbol_db.add(cvs_item)

    stats_keeper.set_stats_reflect_exclude(True)

    rev_db.close()
    symbol_db.close()
    revision_excluder.finish()
    cvs_item_store.close()
    Ctx()._symbol_db.close()
    Ctx()._cvs_file_db.close()

    Log().quiet("Done")


class SortRevisionSummaryPass(Pass):
  """Sort the revision summary file."""

  def register_artifacts(self):
    self._register_temp_file(config.CVS_REVS_SUMMARY_SORTED_DATAFILE)
    self._register_temp_file_needed(config.CVS_REVS_SUMMARY_DATAFILE)

  def run(self, run_options, stats_keeper):
    Log().quiet("Sorting CVS revision summaries...")
    sort_file(
        artifact_manager.get_temp_file(config.CVS_REVS_SUMMARY_DATAFILE),
        artifact_manager.get_temp_file(
            config.CVS_REVS_SUMMARY_SORTED_DATAFILE))
    Log().quiet("Done")


class SortSymbolSummaryPass(Pass):
  """Sort the symbol summary file."""

  def register_artifacts(self):
    self._register_temp_file(config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE)
    self._register_temp_file_needed(config.CVS_SYMBOLS_SUMMARY_DATAFILE)

  def run(self, run_options, stats_keeper):
    Log().quiet("Sorting CVS symbol summaries...")
    sort_file(
        artifact_manager.get_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE),
        artifact_manager.get_temp_file(
            config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE))
    Log().quiet("Done")


class InitializeChangesetsPass(Pass):
  """Create preliminary CommitSets."""

  def register_artifacts(self):
    self._register_temp_file(config.CVS_ITEM_TO_CHANGESET)
    self._register_temp_file(config.CHANGESETS_STORE)
    self._register_temp_file(config.CHANGESETS_INDEX)
    self._register_temp_file(config.CVS_ITEMS_SORTED_STORE)
    self._register_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE)
    self._register_temp_file_needed(config.PROJECTS)
    self._register_temp_file_needed(config.SYMBOL_DB)
    self._register_temp_file_needed(config.CVS_FILES_DB)
    self._register_temp_file_needed(config.SUMMARY_SERIALIZER)
    self._register_temp_file_needed(config.CVS_REVS_SUMMARY_SORTED_DATAFILE)
    self._register_temp_file_needed(
        config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE)

  def get_revision_changesets(self):
    """Generate revision changesets, one at a time.

    Each time, yield a list of CVSRevisions that might potentially
    consititute a changeset."""

    # Create changesets for CVSRevisions:
    old_metadata_id = None
    old_timestamp = None
    changeset_items = []

    db = OldSortableCVSRevisionDatabase(
        artifact_manager.get_temp_file(
            config.CVS_REVS_SUMMARY_SORTED_DATAFILE
            ),
        self.cvs_item_serializer,
        )

    for cvs_rev in db:
      if cvs_rev.metadata_id != old_metadata_id \
         or cvs_rev.timestamp > old_timestamp + config.COMMIT_THRESHOLD:
        # Start a new changeset.  First finish up the old changeset,
        # if any:
        if changeset_items:
          yield changeset_items
          changeset_items = []
        old_metadata_id = cvs_rev.metadata_id
      changeset_items.append(cvs_rev)
      old_timestamp = cvs_rev.timestamp

    # Finish up the last changeset, if any:
    if changeset_items:
      yield changeset_items

  def get_symbol_changesets(self):
    """Generate symbol changesets, one at a time.

    Each time, yield a list of CVSSymbols that might potentially
    consititute a changeset."""

    old_symbol_id = None
    changeset_items = []

    db = OldSortableCVSSymbolDatabase(
        artifact_manager.get_temp_file(
            config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
            ),
        self.cvs_item_serializer,
        )

    for cvs_symbol in db:
      if cvs_symbol.symbol.id != old_symbol_id:
        # Start a new changeset.  First finish up the old changeset,
        # if any:
        if changeset_items:
          yield changeset_items
          changeset_items = []
        old_symbol_id = cvs_symbol.symbol.id
      changeset_items.append(cvs_symbol)

    # Finish up the last changeset, if any:
    if changeset_items:
      yield changeset_items

  @staticmethod
  def compare_items(a, b):
      return (
          cmp(a.timestamp, b.timestamp)
          or cmp(a.cvs_file.cvs_path, b.cvs_file.cvs_path)
          or cmp([int(x) for x in a.rev.split('.')],
                 [int(x) for x in b.rev.split('.')])
          or cmp(a.id, b.id))

  def break_internal_dependencies(self, changeset_items):
    """Split up CHANGESET_ITEMS if necessary to break internal dependencies.

    CHANGESET_ITEMS is a list of CVSRevisions that could possibly
    belong in a single RevisionChangeset, but there might be internal
    dependencies among the items.  Return a list of lists, where each
    sublist is a list of CVSRevisions and at least one internal
    dependency has been eliminated.  Iff CHANGESET_ITEMS does not have
    to be split, then the return value will contain a single value,
    namely the original value of CHANGESET_ITEMS.  Split
    CHANGESET_ITEMS at most once, even though the resulting changesets
    might themselves have internal dependencies."""

    # We only look for succ dependencies, since by doing so we
    # automatically cover pred dependencies as well.  First create a
    # list of tuples (pred, succ) of id pairs for CVSItems that depend
    # on each other.
    dependencies = []
    changeset_cvs_item_ids = set([cvs_rev.id for cvs_rev in changeset_items])
    for cvs_item in changeset_items:
      for next_id in cvs_item.get_succ_ids():
        if next_id in changeset_cvs_item_ids:
          # Sanity check: a CVSItem should never depend on itself:
          if next_id == cvs_item.id:
            raise InternalError('Item depends on itself: %s' % (cvs_item,))

          dependencies.append((cvs_item.id, next_id,))

    if dependencies:
      # Sort the changeset_items in a defined order (chronological to the
      # extent that the timestamps are correct and unique).
      changeset_items.sort(self.compare_items)
      indexes = {}
      for i in range(len(changeset_items)):
        indexes[changeset_items[i].id] = i
      # How many internal dependencies would be broken by breaking the
      # Changeset after a particular index?
      breaks = [0] * len(changeset_items)
      for (pred, succ,) in dependencies:
        pred_index = indexes[pred]
        succ_index = indexes[succ]
        breaks[min(pred_index, succ_index)] += 1
        breaks[max(pred_index, succ_index)] -= 1
      best_i = None
      best_count = -1
      best_time = 0
      for i in range(1, len(breaks)):
        breaks[i] += breaks[i - 1]
      for i in range(0, len(breaks) - 1):
        if breaks[i] > best_count:
          best_i = i
          best_count = breaks[i]
          best_time = (changeset_items[i + 1].timestamp
                       - changeset_items[i].timestamp)
        elif breaks[i] == best_count \
             and (changeset_items[i + 1].timestamp
                  - changeset_items[i].timestamp) < best_time:
          best_i = i
          best_count = breaks[i]
          best_time = (changeset_items[i + 1].timestamp
                       - changeset_items[i].timestamp)
      # Reuse the old changeset.id for the first of the split changesets.
      return [changeset_items[:best_i + 1], changeset_items[best_i + 1:]]
    else:
      return [changeset_items]

  def break_all_internal_dependencies(self, changeset_items):
    """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.

    CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
    be part of a single changeset.  Break this list into sublists,
    where the CVSRevisions in each sublist are free of mutual
    dependencies."""

    # This method is written non-recursively to avoid any possible
    # problems with recursion depth.

    changesets_to_split = [changeset_items]
    while changesets_to_split:
      changesets = self.break_internal_dependencies(changesets_to_split.pop())
      if len(changesets) == 1:
        [changeset_items] = changesets
        yield changeset_items
      else:
        # The changeset had to be split; see if either of the
        # fragments have to be split:
        changesets.reverse()
        changesets_to_split.extend(changesets)

  def get_changesets(self):
    """Generate (Changeset, [CVSItem,...]) for all changesets.

    The Changesets already have their internal dependencies broken.
    The [CVSItem,...] list is the list of CVSItems in the
    corresponding Changeset."""

    for changeset_items in self.get_revision_changesets():
      for split_changeset_items \
              in self.break_all_internal_dependencies(changeset_items):
        yield (
            RevisionChangeset(
                self.changeset_key_generator.gen_id(),
                [cvs_rev.id for cvs_rev in split_changeset_items]
                ),
            split_changeset_items,
            )

    for changeset_items in self.get_symbol_changesets():
      yield (
          create_symbol_changeset(
              self.changeset_key_generator.gen_id(),
              changeset_items[0].symbol,
              [cvs_symbol.id for cvs_symbol in changeset_items]
              ),
          changeset_items,
          )

  def run(self, run_options, stats_keeper):
    Log().quiet("Creating preliminary commit sets...")

    Ctx()._projects = read_projects(
        artifact_manager.get_temp_file(config.PROJECTS)
        )
    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
    Ctx()._symbol_db = SymbolDatabase()

    f = open(artifact_manager.get_temp_file(config.SUMMARY_SERIALIZER), 'rb')
    self.cvs_item_serializer = cPickle.load(f)
    f.close()

    changeset_db = ChangesetDatabase(
        artifact_manager.get_temp_file(config.CHANGESETS_STORE),
        artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
        DB_OPEN_NEW,
        )
    cvs_item_to_changeset_id = CVSItemToChangesetTable(
        artifact_manager.get_temp_file(config.CVS_ITEM_TO_CHANGESET),
        DB_OPEN_NEW,
        )

    self.sorted_cvs_items_db = IndexedCVSItemStore(
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
        DB_OPEN_NEW)

    self.changeset_key_generator = KeyGenerator()

    for (changeset, changeset_items) in self.get_changesets():
      if Log().is_on(Log.DEBUG):
        Log().debug(repr(changeset))
      changeset_db.store(changeset)
      for cvs_item in changeset_items:
        self.sorted_cvs_items_db.add(cvs_item)
        cvs_item_to_changeset_id[cvs_item.id] = changeset.id

    self.sorted_cvs_items_db.close()
    cvs_item_to_changeset_id.close()
    changeset_db.close()
    Ctx()._symbol_db.close()
    Ctx()._cvs_file_db.close()

    del self.cvs_item_serializer

    Log().quiet("Done")


class ProcessedChangesetLogger:
  def __init__(self):
    self.processed_changeset_ids = []

  def log(self, changeset_id):
    if Log().is_on(Log.DEBUG):
      self.processed_changeset_ids.append(changeset_id)

  def flush(self):
    if self.processed_changeset_ids:
      Log().debug(
          'Consumed changeset ids %s'
          % (', '.join(['%x' % id for id in self.processed_changeset_ids]),))

      del self.processed_changeset_ids[:]


class BreakRevisionChangesetCyclesPass(Pass):
  """Break up any dependency cycles involving only RevisionChangesets."""

  def register_artifacts(self):
    self._register_temp_file(config.CHANGESETS_REVBROKEN_STORE)
    self._register_temp_file(config.CHANGESETS_REVBROKEN_INDEX)
    self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
    self._register_temp_file_needed(config.PROJECTS)
    self._register_temp_file_needed(config.SYMBOL_DB)
    self._register_temp_file_needed(config.CVS_FILES_DB)
    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
    self._register_temp_file_needed(config.CHANGESETS_STORE)
    self._register_temp_file_needed(config.CHANGESETS_INDEX)
    self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET)

  def get_source_changesets(self):
    old_changeset_db = ChangesetDatabase(
        artifact_manager.get_temp_file(config.CHANGESETS_STORE),
        artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
        DB_OPEN_READ)

    changeset_ids = old_changeset_db.keys()

    for changeset_id in changeset_ids:
      yield old_changeset_db[changeset_id]

    old_changeset_db.close()
    del old_changeset_db

  def break_cycle(self, cycle):
    """Break up one or more changesets in CYCLE to help break the cycle.

    CYCLE is a list of Changesets where

        cycle[i] depends on cycle[i - 1]

    Break up one or more changesets in CYCLE to make progress towards
    breaking the cycle.  Update self.changeset_graph accordingly.

    It is not guaranteed that the cycle will be broken by one call to
    this routine, but at least some progress must be made."""

    self.processed_changeset_logger.flush()
    best_i = None
    best_link = None
    for i in range(len(cycle)):
      # It's OK if this index wraps to -1:
      link = ChangesetGraphLink(
          cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])

      if best_i is None or link < best_link:
        best_i = i
        best_link = link

    if Log().is_on(Log.DEBUG):
      Log().debug(
          'Breaking cycle %s by breaking node %x' % (
          ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
          best_link.changeset.id,))

    new_changesets = best_link.break_changeset(self.changeset_key_generator)

    self.changeset_graph.delete_changeset(best_link.changeset)

    for changeset in new_changesets:
      self.changeset_graph.add_new_changeset(changeset)

  def run(self, run_options, stats_keeper):
    Log().quiet("Breaking revision changeset dependency cycles...")

    Ctx()._projects = read_projects(
        artifact_manager.get_temp_file(config.PROJECTS)
        )
    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
    Ctx()._symbol_db = SymbolDatabase()
    Ctx()._cvs_items_db = IndexedCVSItemStore(
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
        DB_OPEN_READ)

    shutil.copyfile(
        artifact_manager.get_temp_file(
            config.CVS_ITEM_TO_CHANGESET),
        artifact_manager.get_temp_file(
            config.CVS_ITEM_TO_CHANGESET_REVBROKEN))
    cvs_item_to_changeset_id = CVSItemToChangesetTable(
        artifact_manager.get_temp_file(
            config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
        DB_OPEN_WRITE)

    changeset_db = ChangesetDatabase(
        artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
        artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
        DB_OPEN_NEW)

    self.changeset_graph = ChangesetGraph(
        changeset_db, cvs_item_to_changeset_id
        )

    max_changeset_id = 0
    for changeset in self.get_source_changesets():
      changeset_db.store(changeset)
      if isinstance(changeset, RevisionChangeset):
        self.changeset_graph.add_changeset(changeset)
      max_changeset_id = max(max_changeset_id, changeset.id)

    self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)

    self.processed_changeset_logger = ProcessedChangesetLogger()

    # Consume the graph, breaking cycles using self.break_cycle():
    for (changeset, time_range) in self.changeset_graph.consume_graph(
          cycle_breaker=self.break_cycle
          ):
      self.processed_changeset_logger.log(changeset.id)

    self.processed_changeset_logger.flush()
    del self.processed_changeset_logger

    self.changeset_graph.close()
    self.changeset_graph = None
    Ctx()._cvs_items_db.close()
    Ctx()._symbol_db.close()
    Ctx()._cvs_file_db.close()

    Log().quiet("Done")


class RevisionTopologicalSortPass(Pass):
  """Sort RevisionChangesets into commit order.

  Also convert them to OrderedChangesets, without changing their ids."""

  def register_artifacts(self):
    self._register_temp_file(config.CHANGESETS_REVSORTED_STORE)
    self._register_temp_file(config.CHANGESETS_REVSORTED_INDEX)
    self._register_temp_file_needed(config.PROJECTS)
    self._register_temp_file_needed(config.SYMBOL_DB)
    self._register_temp_file_needed(config.CVS_FILES_DB)
    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
    self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_STORE)
    self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_INDEX)
    self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)

  def get_source_changesets(self, changeset_db):
    changeset_ids = changeset_db.keys()

    for changeset_id in changeset_ids:
      yield changeset_db[changeset_id]

  def get_changesets(self):
    changeset_db = ChangesetDatabase(
        artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
        artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
        DB_OPEN_READ,
        )

    changeset_graph = ChangesetGraph(
        changeset_db,
        CVSItemToChangesetTable(
            artifact_manager.get_temp_file(
                config.CVS_ITEM_TO_CHANGESET_REVBROKEN
                ),
            DB_OPEN_READ,
            )
        )

    for changeset in self.get_source_changesets(changeset_db):
      if isinstance(changeset, RevisionChangeset):
        changeset_graph.add_changeset(changeset)
      else:
        yield changeset

    changeset_ids = []

    # Sentry:
    changeset_ids.append(None)

    for (changeset, time_range) in changeset_graph.consume_graph():
      changeset_ids.append(changeset.id)

    # Sentry:
    changeset_ids.append(None)

    for i in range(1, len(changeset_ids) - 1):
      changeset = changeset_db[changeset_ids[i]]
      yield OrderedChangeset(
          changeset.id, changeset.cvs_item_ids, i - 1,
          changeset_ids[i - 1], changeset_ids[i + 1])

    changeset_graph.close()

  def run(self, run_options, stats_keeper):
    Log().quiet("Generating CVSRevisions in commit order...")

    Ctx()._projects = read_projects(
        artifact_manager.get_temp_file(config.PROJECTS)
        )
    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
    Ctx()._symbol_db = SymbolDatabase()
    Ctx()._cvs_items_db = IndexedCVSItemStore(
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
        DB_OPEN_READ)

    changesets_revordered_db = ChangesetDatabase(
        artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
        artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
        DB_OPEN_NEW)

    for changeset in self.get_changesets():
      changesets_revordered_db.store(changeset)

    changesets_revordered_db.close()
    Ctx()._cvs_items_db.close()
    Ctx()._symbol_db.close()
    Ctx()._cvs_file_db.close()

    Log().quiet("Done")


class BreakSymbolChangesetCyclesPass(Pass):
  """Break up any dependency cycles involving only SymbolChangesets."""

  def register_artifacts(self):
    self._register_temp_file(config.CHANGESETS_SYMBROKEN_STORE)
    self._register_temp_file(config.CHANGESETS_SYMBROKEN_INDEX)
    self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
    self._register_temp_file_needed(config.PROJECTS)
    self._register_temp_file_needed(config.SYMBOL_DB)
    self._register_temp_file_needed(config.CVS_FILES_DB)
    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
    self._register_temp_file_needed(config.CHANGESETS_REVSORTED_STORE)
    self._register_temp_file_needed(config.CHANGESETS_REVSORTED_INDEX)
    self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)

  def get_source_changesets(self):
    old_changeset_db = ChangesetDatabase(
        artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
        artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
        DB_OPEN_READ)

    changeset_ids = old_changeset_db.keys()

    for changeset_id in changeset_ids:
      yield old_changeset_db[changeset_id]

    old_changeset_db.close()

  def break_cycle(self, cycle):
    """Break up one or more changesets in CYCLE to help break the cycle.

    CYCLE is a list of Changesets where

        cycle[i] depends on cycle[i - 1]

    Break up one or more changesets in CYCLE to make progress towards
    breaking the cycle.  Update self.changeset_graph accordingly.

    It is not guaranteed that the cycle will be broken by one call to
    this routine, but at least some progress must be made."""

    self.processed_changeset_logger.flush()
    best_i = None
    best_link = None
    for i in range(len(cycle)):
      # It's OK if this index wraps to -1:
      link = ChangesetGraphLink(
          cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])

      if best_i is None or link < best_link:
        best_i = i
        best_link = link

    if Log().is_on(Log.DEBUG):
      Log().debug(
          'Breaking cycle %s by breaking node %x' % (
          ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
          best_link.changeset.id,))

    new_changesets = best_link.break_changeset(self.changeset_key_generator)

    self.changeset_graph.delete_changeset(best_link.changeset)

    for changeset in new_changesets:
      self.changeset_graph.add_new_changeset(changeset)

  def run(self, run_options, stats_keeper):
    Log().quiet("Breaking symbol changeset dependency cycles...")

    Ctx()._projects = read_projects(
        artifact_manager.get_temp_file(config.PROJECTS)
        )
    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
    Ctx()._symbol_db = SymbolDatabase()
    Ctx()._cvs_items_db = IndexedCVSItemStore(
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
        DB_OPEN_READ)

    shutil.copyfile(
        artifact_manager.get_temp_file(
            config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
        artifact_manager.get_temp_file(
            config.CVS_ITEM_TO_CHANGESET_SYMBROKEN))
    cvs_item_to_changeset_id = CVSItemToChangesetTable(
        artifact_manager.get_temp_file(
            config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
        DB_OPEN_WRITE)

    changeset_db = ChangesetDatabase(
        artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
        artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
        DB_OPEN_NEW)

    self.changeset_graph = ChangesetGraph(
        changeset_db, cvs_item_to_changeset_id
        )

    max_changeset_id = 0
    for changeset in self.get_source_changesets():
      changeset_db.store(changeset)
      if isinstance(changeset, SymbolChangeset):
        self.changeset_graph.add_changeset(changeset)
      max_changeset_id = max(max_changeset_id, changeset.id)

    self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)

    self.processed_changeset_logger = ProcessedChangesetLogger()

    # Consume the graph, breaking cycles using self.break_cycle():
    for (changeset, time_range) in self.changeset_graph.consume_graph(
          cycle_breaker=self.break_cycle
          ):
      self.processed_changeset_logger.log(changeset.id)

    self.processed_changeset_logger.flush()
    del self.processed_changeset_logger

    self.changeset_graph.close()
    self.changeset_graph = None
    Ctx()._cvs_items_db.close()
    Ctx()._symbol_db.close()
    Ctx()._cvs_file_db.close()

    Log().quiet("Done")


class BreakAllChangesetCyclesPass(Pass):
  """Break up any dependency cycles that are closed by SymbolChangesets."""

  def register_artifacts(self):
    self._register_temp_file(config.CHANGESETS_ALLBROKEN_STORE)
    self._register_temp_file(config.CHANGESETS_ALLBROKEN_INDEX)
    self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
    self._register_temp_file_needed(config.PROJECTS)
    self._register_temp_file_needed(config.SYMBOL_DB)
    self._register_temp_file_needed(config.CVS_FILES_DB)
    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
    self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_STORE)
    self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_INDEX)
    self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)

  def get_source_changesets(self):
    old_changeset_db = ChangesetDatabase(
        artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
        artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
        DB_OPEN_READ)

    changeset_ids = old_changeset_db.keys()

    for changeset_id in changeset_ids:
      yield old_changeset_db[changeset_id]

    old_changeset_db.close()

  def _split_retrograde_changeset(self, changeset):
    """CHANGESET is retrograde.  Split it into non-retrograde changesets."""

    Log().debug('Breaking retrograde changeset %x' % (changeset.id,))

    self.changeset_graph.delete_changeset(changeset)

    # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
    ordinal_limits = {}
    for cvs_branch in changeset.iter_cvs_items():
      max_pred_ordinal = 0
      min_succ_ordinal = sys.maxint

      for pred_id in cvs_branch.get_pred_ids():
        pred_ordinal = self.ordinals.get(
            self.cvs_item_to_changeset_id[pred_id], 0)
        max_pred_ordinal = max(max_pred_ordinal, pred_ordinal)

      for succ_id in cvs_branch.get_succ_ids():
        succ_ordinal = self.ordinals.get(
            self.cvs_item_to_changeset_id[succ_id], sys.maxint)
        min_succ_ordinal = min(min_succ_ordinal, succ_ordinal)

      assert max_pred_ordinal < min_succ_ordinal
      ordinal_limits[cvs_branch.id] = (max_pred_ordinal, min_succ_ordinal,)

    # Find the earliest successor ordinal:
    min_min_succ_ordinal = sys.maxint
    for (max_pred_ordinal, min_succ_ordinal) in ordinal_limits.values():
      min_min_succ_ordinal = min(min_min_succ_ordinal, min_succ_ordinal)

    early_item_ids = []
    late_item_ids = []
    for (id, (max_pred_ordinal, min_succ_ordinal)) in ordinal_limits.items():
      if max_pred_ordinal >= min_min_succ_ordinal:
        late_item_ids.append(id)
      else:
        early_item_ids.append(id)

    assert early_item_ids
    assert late_item_ids

    early_changeset = changeset.create_split_changeset(
        self.changeset_key_generator.gen_id(), early_item_ids)
    late_changeset = changeset.create_split_changeset(
        self.changeset_key_generator.gen_id(), late_item_ids)

    self.changeset_graph.add_new_changeset(early_changeset)
    self.changeset_graph.add_new_changeset(late_changeset)

    early_split = self._split_if_retrograde(early_changeset.id)

    # Because of the way we constructed it, the early changeset should
    # not have to be split:
    assert not early_split

    self._split_if_retrograde(late_changeset.id)

  def _split_if_retrograde(self, changeset_id):
    node = self.changeset_graph[changeset_id]
    pred_ordinals = [
        self.ordinals[id]
        for id in node.pred_ids
        if id in self.ordinals
        ]
    pred_ordinals.sort()
    succ_ordinals = [
        self.ordinals[id]
        for id in node.succ_ids
        if id in self.ordinals
        ]
    succ_ordinals.sort()
    if pred_ordinals and succ_ordinals \
           and pred_ordinals[-1] >= succ_ordinals[0]:
      self._split_retrograde_changeset(self.changeset_db[node.id])
      return True
    else:
      return False

  def break_segment(self, segment):
    """Break a changeset in SEGMENT[1:-1].

    The range SEGMENT[1:-1] is not empty, and all of the changesets in
    that range are SymbolChangesets."""

    best_i = None
    best_link = None
    for i in range(1, len(segment) - 1):
      link = ChangesetGraphLink(segment[i - 1], segment[i], segment[i + 1])

      if best_i is None or link < best_link:
        best_i = i
        best_link = link

    if Log().is_on(Log.DEBUG):
      Log().debug(
          'Breaking segment %s by breaking node %x' % (
          ' -> '.join(['%x' % node.id for node in segment]),
          best_link.changeset.id,))

    new_changesets = best_link.break_changeset(self.changeset_key_generator)

    self.changeset_graph.delete_changeset(best_link.changeset)

    for changeset in new_changesets:
      self.changeset_graph.add_new_changeset(changeset)

  def break_cycle(self, cycle):
    """Break up one or more SymbolChangesets in CYCLE to help break the cycle.

    CYCLE is a list of SymbolChangesets where

        cycle[i] depends on cycle[i - 1]

    .  Break up one or more changesets in CYCLE to make progress
    towards breaking the cycle.  Update self.changeset_graph
    accordingly.

    It is not guaranteed that the cycle will be broken by one call to
    this routine, but at least some progress must be made."""

    if Log().is_on(Log.DEBUG):
      Log().debug(
          'Breaking cycle %s' % (
          ' -> '.join(['%x' % changeset.id
                       for changeset in cycle + [cycle[0]]]),))

    # Unwrap the cycle into a segment then break the segment:
    self.break_segment([cycle[-1]] + cycle + [cycle[0]])

  def run(self, run_options, stats_keeper):
    Log().quiet("Breaking CVSSymbol dependency loops...")

    Ctx()._projects = read_projects(
        artifact_manager.get_temp_file(config.PROJECTS)
        )
    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
    Ctx()._symbol_db = SymbolDatabase()
    Ctx()._cvs_items_db = IndexedCVSItemStore(
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
        DB_OPEN_READ)

    shutil.copyfile(
        artifact_manager.get_temp_file(
            config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
        artifact_manager.get_temp_file(
            config.CVS_ITEM_TO_CHANGESET_ALLBROKEN))
    self.cvs_item_to_changeset_id = CVSItemToChangesetTable(
        artifact_manager.get_temp_file(
            config.CVS_ITEM_TO_CHANGESET_ALLBROKEN),
        DB_OPEN_WRITE)

    self.changeset_db = ChangesetDatabase(
        artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
        artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
        DB_OPEN_NEW)

    self.changeset_graph = ChangesetGraph(
        self.changeset_db, self.cvs_item_to_changeset_id
        )

    # A map {changeset_id : ordinal} for OrderedChangesets:
    self.ordinals = {}
    # A map {ordinal : changeset_id}:
    ordered_changeset_map = {}
    # A list of all BranchChangeset ids:
    branch_changeset_ids = []
    max_changeset_id = 0
    for changeset in self.get_source_changesets():
      self.changeset_db.store(changeset)
      self.changeset_graph.add_changeset(changeset)
      if isinstance(changeset, OrderedChangeset):
        ordered_changeset_map[changeset.ordinal] = changeset.id
        self.ordinals[changeset.id] = changeset.ordinal
      elif isinstance(changeset, BranchChangeset):
        branch_changeset_ids.append(changeset.id)
      max_changeset_id = max(max_changeset_id, changeset.id)

    # An array of ordered_changeset ids, indexed by ordinal:
    ordered_changesets = []
    for ordinal in range(len(ordered_changeset_map)):
      id = ordered_changeset_map[ordinal]
      ordered_changesets.append(id)

    ordered_changeset_ids = set(ordered_changeset_map.values())
    del ordered_changeset_map

    self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)

    # First we scan through all BranchChangesets looking for
    # changesets that are individually "retrograde" and splitting
    # those up:
    for changeset_id in branch_changeset_ids:
      self._split_if_retrograde(changeset_id)

    del self.ordinals

    next_ordered_changeset = 0

    self.processed_changeset_logger = ProcessedChangesetLogger()

    while self.changeset_graph:
      # Consume any nodes that don't have predecessors:
      for (changeset, time_range) \
              in self.changeset_graph.consume_nopred_nodes():
        self.processed_changeset_logger.log(changeset.id)
        if changeset.id in ordered_changeset_ids:
          next_ordered_changeset += 1
          ordered_changeset_ids.remove(changeset.id)

      self.processed_changeset_logger.flush()

      if not self.changeset_graph:
        break

      # Now work on the next ordered changeset that has not yet been
      # processed.  BreakSymbolChangesetCyclesPass has broken any
      # cycles involving only SymbolChangesets, so the presence of a
      # cycle implies that there is at least one ordered changeset
      # left in the graph:
      assert next_ordered_changeset < len(ordered_changesets)

      id = ordered_changesets[next_ordered_changeset]
      path = self.changeset_graph.search_for_path(id, ordered_changeset_ids)
      if path:
        if Log().is_on(Log.DEBUG):
          Log().debug('Breaking path from %s to %s' % (path[0], path[-1],))
        self.break_segment(path)
      else:
        # There were no ordered changesets among the reachable
        # predecessors, so do generic cycle-breaking:
        if Log().is_on(Log.DEBUG):
          Log().debug(
              'Breaking generic cycle found from %s'
              % (self.changeset_db[id],)
              )
        self.break_cycle(self.changeset_graph.find_cycle(id))

    del self.processed_changeset_logger
    self.changeset_graph.close()
    self.changeset_graph = None
    self.cvs_item_to_changeset_id = None
    self.changeset_db = None

    Log().quiet("Done")


class TopologicalSortPass(Pass):
  """Sort changesets into commit order."""

  def register_artifacts(self):
    self._register_temp_file(config.CHANGESETS_SORTED_DATAFILE)
    self._register_temp_file_needed(config.PROJECTS)
    self._register_temp_file_needed(config.SYMBOL_DB)
    self._register_temp_file_needed(config.CVS_FILES_DB)
    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
    self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
    self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
    self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)

  def get_source_changesets(self, changeset_db):
    for changeset_id in changeset_db.keys():
      yield changeset_db[changeset_id]

  def get_changesets(self):
    """Generate (changeset, timestamp) pairs in commit order."""

    changeset_db = ChangesetDatabase(
        artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
        artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
        DB_OPEN_READ)

    changeset_graph = ChangesetGraph(
        changeset_db,
        CVSItemToChangesetTable(
            artifact_manager.get_temp_file(
                config.CVS_ITEM_TO_CHANGESET_ALLBROKEN
                ),
            DB_OPEN_READ,
            ),
        )
    symbol_changeset_ids = set()

    for changeset in self.get_source_changesets(changeset_db):
      changeset_graph.add_changeset(changeset)
      if isinstance(changeset, SymbolChangeset):
        symbol_changeset_ids.add(changeset.id)

    # Ensure a monotonically-increasing timestamp series by keeping
    # track of the previous timestamp and ensuring that the following
    # one is larger.
    timestamper = Timestamper()

    for (changeset, time_range) in changeset_graph.consume_graph():
      timestamp = timestamper.get(
          time_range.t_max, changeset.id in symbol_changeset_ids
          )
      yield (changeset, timestamp)

    changeset_graph.close()

  def run(self, run_options, stats_keeper):
    Log().quiet("Generating CVSRevisions in commit order...")

    Ctx()._projects = read_projects(
        artifact_manager.get_temp_file(config.PROJECTS)
        )
    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
    Ctx()._symbol_db = SymbolDatabase()
    Ctx()._cvs_items_db = IndexedCVSItemStore(
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
        DB_OPEN_READ)

    sorted_changesets = open(
        artifact_manager.get_temp_file(config.CHANGESETS_SORTED_DATAFILE),
        'w')

    for (changeset, timestamp) in self.get_changesets():
      sorted_changesets.write('%x %08x\n' % (changeset.id, timestamp,))

    sorted_changesets.close()

    Ctx()._cvs_items_db.close()
    Ctx()._symbol_db.close()
    Ctx()._cvs_file_db.close()

    Log().quiet("Done")


class CreateRevsPass(Pass):
  """Generate the SVNCommit <-> CVSRevision mapping databases.

  SVNCommitCreator also calls SymbolingsLogger to register
  CVSRevisions that represent an opening or closing for a path on a
  branch or tag.  See SymbolingsLogger for more details.

  This pass was formerly known as pass5."""

  def register_artifacts(self):
    self._register_temp_file(config.SVN_COMMITS_INDEX_TABLE)
    self._register_temp_file(config.SVN_COMMITS_STORE)
    self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS)
    self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS)
    self._register_temp_file_needed(config.PROJECTS)
    self._register_temp_file_needed(config.CVS_FILES_DB)
    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
    self._register_temp_file_needed(config.SYMBOL_DB)
    self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
    self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
    self._register_temp_file_needed(config.CHANGESETS_SORTED_DATAFILE)

  def get_changesets(self):
    """Generate (changeset,timestamp,) tuples in commit order."""

    changeset_db = ChangesetDatabase(
        artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
        artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
        DB_OPEN_READ)

    for line in file(
            artifact_manager.get_temp_file(
                config.CHANGESETS_SORTED_DATAFILE)):
      [changeset_id, timestamp] = [int(s, 16) for s in line.strip().split()]
      yield (changeset_db[changeset_id], timestamp)

    changeset_db.close()

  def get_svn_commits(self, creator):
    """Generate the SVNCommits, in order."""

    for (changeset, timestamp) in self.get_changesets():
      for svn_commit in creator.process_changeset(changeset, timestamp):
        yield svn_commit

  def log_svn_commit(self, svn_commit):
    """Output information about SVN_COMMIT."""

    Log().normal(
        'Creating Subversion r%d (%s)'
        % (svn_commit.revnum, svn_commit.get_description(),)
        )

    if isinstance(svn_commit, SVNRevisionCommit):
      for cvs_rev in svn_commit.cvs_revs:
        Log().verbose(' %s %s' % (cvs_rev.cvs_path, cvs_rev.rev,))

  def run(self, run_options, stats_keeper):
    Log().quiet("Mapping CVS revisions to Subversion commits...")

    Ctx()._projects = read_projects(
        artifact_manager.get_temp_file(config.PROJECTS)
        )
    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
    Ctx()._symbol_db = SymbolDatabase()
    Ctx()._cvs_items_db = IndexedCVSItemStore(
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
        DB_OPEN_READ)

    Ctx()._symbolings_logger = SymbolingsLogger()

    persistence_manager = PersistenceManager(DB_OPEN_NEW)

    creator = SVNCommitCreator()
    for svn_commit in self.get_svn_commits(creator):
      self.log_svn_commit(svn_commit)
      persistence_manager.put_svn_commit(svn_commit)

    stats_keeper.set_svn_rev_count(creator.revnum_generator.get_last_id())
    del creator

    persistence_manager.close()
    Ctx()._symbolings_logger.close()
    Ctx()._cvs_items_db.close()
    Ctx()._symbol_db.close()
    Ctx()._cvs_file_db.close()

    Log().quiet("Done")


class SortSymbolsPass(Pass):
  """This pass was formerly known as pass6."""

  def register_artifacts(self):
    self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
    self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS)

  def run(self, run_options, stats_keeper):
    Log().quiet("Sorting symbolic name source revisions...")

    sort_file(
        artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS),
        artifact_manager.get_temp_file(
            config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
        options=['-k', '1,1', '-k', '2,2n', '-k', '3'],
        )
    Log().quiet("Done")


class IndexSymbolsPass(Pass):
  """This pass was formerly known as pass7."""

  def register_artifacts(self):
    self._register_temp_file(config.SYMBOL_OFFSETS_DB)
    self._register_temp_file_needed(config.PROJECTS)
    self._register_temp_file_needed(config.SYMBOL_DB)
    self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)

  def generate_offsets_for_symbolings(self):
    """This function iterates through all the lines in
    SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
    SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
    where SYMBOLIC_NAME is first encountered.  This will allow us to
    seek to the various offsets in the file and sequentially read only
    the openings and closings that we need."""

    offsets = {}

    f = open(
        artifact_manager.get_temp_file(
            config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
        'r')
    old_id = None
    while True:
      fpos = f.tell()
      line = f.readline()
      if not line:
        break
      id, svn_revnum, ignored = line.split(" ", 2)
      id = int(id, 16)
      if id != old_id:
        Log().verbose(' ', Ctx()._symbol_db.get_symbol(id).name)
        old_id = id
        offsets[id] = fpos

    f.close()

    offsets_db = file(
        artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'wb')
    cPickle.dump(offsets, offsets_db, -1)
    offsets_db.close()

  def run(self, run_options, stats_keeper):
    Log().quiet("Determining offsets for all symbolic names...")
    Ctx()._projects = read_projects(
        artifact_manager.get_temp_file(config.PROJECTS)
        )
    Ctx()._symbol_db = SymbolDatabase()
    self.generate_offsets_for_symbolings()
    Ctx()._symbol_db.close()
    Log().quiet("Done.")


class OutputPass(Pass):
  """This pass was formerly known as pass8."""

  def register_artifacts(self):
    self._register_temp_file_needed(config.PROJECTS)
    self._register_temp_file_needed(config.CVS_FILES_DB)
    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
    self._register_temp_file_needed(config.SYMBOL_DB)
    self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
    self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
    self._register_temp_file_needed(config.SVN_COMMITS_INDEX_TABLE)
    self._register_temp_file_needed(config.SVN_COMMITS_STORE)
    self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS)
    Ctx().output_option.register_artifacts(self)

  def get_svn_commits(self):
    """Generate the SVNCommits in commit order."""

    persistence_manager = PersistenceManager(DB_OPEN_READ)

    svn_revnum = 1 # The first non-trivial commit

    # Peek at the first revision to find the date to use to initialize
    # the repository:
    svn_commit = persistence_manager.get_svn_commit(svn_revnum)

    while svn_commit:
      yield svn_commit
      svn_revnum += 1
      svn_commit = persistence_manager.get_svn_commit(svn_revnum)

    persistence_manager.close()

  def run(self, run_options, stats_keeper):
    Ctx()._projects = read_projects(
        artifact_manager.get_temp_file(config.PROJECTS)
        )
    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
    Ctx()._metadata_db = MetadataDatabase(
        artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
        artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
        DB_OPEN_READ,
        )
    Ctx()._cvs_items_db = IndexedCVSItemStore(
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
        DB_OPEN_READ)
    Ctx()._symbol_db = SymbolDatabase()

    Ctx().output_option.setup(stats_keeper.svn_rev_count())

    for svn_commit in self.get_svn_commits():
      svn_commit.output(Ctx().output_option)

    Ctx().output_option.cleanup()

    Ctx()._symbol_db.close()
    Ctx()._cvs_items_db.close()
    Ctx()._metadata_db.close()
    Ctx()._cvs_file_db.close()


# The list of passes constituting a run of cvs2svn:
passes = [
    CollectRevsPass(),
    CleanMetadataPass(),
    CollateSymbolsPass(),
    #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
    FilterSymbolsPass(),
    SortRevisionSummaryPass(),
    SortSymbolSummaryPass(),
    InitializeChangesetsPass(),
    #CheckIndexedItemStoreDependenciesPass(
    #    config.CVS_ITEMS_SORTED_STORE,
    #    config.CVS_ITEMS_SORTED_INDEX_TABLE),
    BreakRevisionChangesetCyclesPass(),
    RevisionTopologicalSortPass(),
    BreakSymbolChangesetCyclesPass(),
    BreakAllChangesetCyclesPass(),
    TopologicalSortPass(),
    CreateRevsPass(),
    SortSymbolsPass(),
    IndexSymbolsPass(),
    OutputPass(),
    ]