.. Copyright (C) 2001-2010 NLTK Project .. For license information, see LICENSE.TXT =================== Dependency Grammars =================== >>> from nltk.grammar import * >>> from nltk.parse import * CoNLL Data ---------- >>> treebank_data = """Pierre NNP 2 NMOD ... Vinken NNP 8 SUB ... , , 2 P ... 61 CD 5 NMOD ... years NNS 6 AMOD ... old JJ 2 NMOD ... , , 2 P ... will MD 0 ROOT ... join VB 8 VC ... the DT 11 NMOD ... board NN 9 OBJ ... as IN 9 VMOD ... a DT 15 NMOD ... nonexecutive JJ 15 NMOD ... director NN 12 PMOD ... Nov. NNP 9 VMOD ... 29 CD 16 NMOD ... . . 9 VMOD ... """ >>> dg = DependencyGraph(treebank_data) >>> print dg.tree().pprint() (will (Vinken Pierre , (old (years 61)) ,) (join (board the) (as (director a nonexecutive)) (Nov. 29) .)) Using the dependency-parsed version of the Penn Treebank corpus sample. >>> from nltk.corpus import dependency_treebank >>> t = dependency_treebank.parsed_sents()[0] >>> print t.to_conll(3) # doctest: +NORMALIZE_WHITESPACE Pierre NNP 2 Vinken NNP 8 , , 2 61 CD 5 years NNS 6 old JJ 2 , , 2 will MD 0 join VB 8 the DT 11 board NN 9 as IN 9 a DT 15 nonexecutive JJ 15 director NN 12 Nov. NNP 9 29 CD 16 . . 8 Projective Dependency Parsing ----------------------------- >>> grammar = parse_dependency_grammar(""" ... 'fell' -> 'price' | 'stock' ... 'price' -> 'of' 'the' ... 'of' -> 'stock' ... 'stock' -> 'the' ... """) >>> print grammar Dependency grammar with 5 productions 'fell' -> 'price' 'fell' -> 'stock' 'price' -> 'of' 'the' 'of' -> 'stock' 'stock' -> 'the' >>> dp = ProjectiveDependencyParser(grammar) >>> for t in dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell']): ... print t (fell (price the of the) stock) (fell (price the of) (stock the)) (fell (price the (of (stock the)))) Non-Projective Dependency Parsing --------------------------------- >>> grammar = parse_dependency_grammar(""" ... 'taught' -> 'play' | 'man' ... 'man' -> 'the' ... 'play' -> 'golf' | 'dog' | 'to' ... 'dog' -> 'his' ... """) >>> print grammar Dependency grammar with 7 productions 'taught' -> 'play' 'taught' -> 'man' 'man' -> 'the' 'play' -> 'golf' 'play' -> 'dog' 'play' -> 'to' 'dog' -> 'his' >>> dp = NonprojectiveDependencyParser(grammar) >>> for g in dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf']): ... print g [{'address': 0, 'deps': 3, 'rel': 'TOP', 'tag': 'TOP', 'word': None}, {'address': 1, 'deps': [], 'word': 'the'}, {'address': 2, 'deps': [1], 'word': 'man'}, {'address': 3, 'deps': [2, 7], 'word': 'taught'}, {'address': 4, 'deps': [], 'word': 'his'}, {'address': 5, 'deps': [4], 'word': 'dog'}, {'address': 6, 'deps': [], 'word': 'to'}, {'address': 7, 'deps': [5, 6, 8], 'word': 'play'}, {'address': 8, 'deps': [], 'word': 'golf'}]