# Natural Language Toolkit: Dispersion Plots # # Copyright (C) 2001-2010 NLTK Project # Author: Steven Bird # URL: # For license information, see LICENSE.TXT """ A utility for displaying lexical dispersion. """ def dispersion_plot(text, words): """ Generate a lexical dispersion plot. @param text: The source text @type text: C{list} or C{enum} of C{str} @param words: The target words @type words: C{list} of C{str} """ try: import pylab except ImportError: raise ValueError('The plot function requires the matplotlib package.' 'See http://matplotlib.sourceforge.net/') text = list(text) words.reverse() points = [(x,y) for x in range(len(text)) for y in range(len(words)) if text[x] == words[y]] if points: x, y = zip(*points) else: x = y = () pylab.plot(x, y, "b|", scalex=.1) pylab.yticks(range(len(words)), words, color="b") pylab.ylim(-1, len(words)) pylab.title("Lexical Dispersion Plot") pylab.xlabel("Word Offset") pylab.show() if __name__ == '__main__': from nltk.corpus import gutenberg words = ['Elinor', 'Marianne', 'Edward', 'Willoughby'] dispersion_plot(gutenberg.words('austen-sense.txt'), words)