.. Copyright (C) 2001-2010 NLTK Project .. For license information, see LICENSE.TXT ------------------------------------------- Unit tests for the TreeTransformation class ------------------------------------------- >>> from copy import deepcopy >>> from nltk.tree import * >>> from nltk.treetransforms import * >>> sentence = "(TOP (S (S (VP (VBN Turned) (ADVP (RB loose)) (PP (IN in) (NP (NP (NNP Shane) (NNP Longman) (POS 's)) (NN trading) (NN room))))) (, ,) (NP (DT the) (NN yuppie) (NNS dealers)) (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) (. .)))" >>> tree = bracket_parse(sentence) >>> print tree (TOP (S (S (VP (VBN Turned) (ADVP (RB loose)) (PP (IN in) (NP (NP (NNP Shane) (NNP Longman) (POS 's)) (NN trading) (NN room))))) (, ,) (NP (DT the) (NN yuppie) (NNS dealers)) (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) (. .))) Make a copy of the original tree and collapse the subtrees with only one child >>> collapsedTree = deepcopy(tree) >>> collapse_unary(collapsedTree) >>> print collapsedTree (TOP (S (S+VP (VBN Turned) (ADVP (RB loose)) (PP (IN in) (NP (NP (NNP Shane) (NNP Longman) (POS 's)) (NN trading) (NN room)))) (, ,) (NP (DT the) (NN yuppie) (NNS dealers)) (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) (. .))) >>> collapsedTree2 = deepcopy(tree) >>> collapse_unary(collapsedTree2, collapsePOS=True, collapseRoot=True) >>> print collapsedTree2 (TOP+S (S+VP (VBN Turned) (ADVP+RB loose) (PP (IN in) (NP (NP (NNP Shane) (NNP Longman) (POS 's)) (NN trading) (NN room)))) (, ,) (NP (DT the) (NN yuppie) (NNS dealers)) (VP (AUX do) (NP (NP+RB little) (ADJP+RB right))) (. .)) Convert the tree to Chomsky Normal Form i.e. each subtree has either two subtree children or a single leaf value. This conversion can be performed using either left- or right-factoring. >>> cnfTree = deepcopy(collapsedTree) >>> chomsky_normal_form(cnfTree, factor='left') >>> print cnfTree (TOP (S (S| (S| (S| (S+VP (S+VP| (VBN Turned) (ADVP (RB loose))) (PP (IN in) (NP (NP| (NP (NP| (NNP Shane) (NNP Longman)) (POS 's)) (NN trading)) (NN room)))) (, ,)) (NP (NP| (DT the) (NN yuppie)) (NNS dealers))) (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))) (. .))) >>> cnfTree = deepcopy(collapsedTree) >>> chomsky_normal_form(cnfTree, factor='right') >>> print cnfTree (TOP (S (S+VP (VBN Turned) (S+VP| (ADVP (RB loose)) (PP (IN in) (NP (NP (NNP Shane) (NP| (NNP Longman) (POS 's))) (NP| (NN trading) (NN room)))))) (S|<,-NP-VP-.> (, ,) (S| (NP (DT the) (NP| (NN yuppie) (NNS dealers))) (S| (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) (. .)))))) Employ some Markov smoothing to make the artificial node labels a bit more readable. See the treetransforms.py documentation for more details. >>> markovTree = deepcopy(collapsedTree) >>> chomsky_normal_form(markovTree, horzMarkov=2, vertMarkov=1) >>> print markovTree (TOP (S^ (S+VP^ (VBN Turned) (S+VP|^ (ADVP^ (RB loose)) (PP^ (IN in) (NP^ (NP^ (NNP Shane) (NP|^ (NNP Longman) (POS 's))) (NP|^ (NN trading) (NN room)))))) (S|<,-NP>^ (, ,) (S|^ (NP^ (DT the) (NP|^ (NN yuppie) (NNS dealers))) (S|^ (VP^ (AUX do) (NP^ (NP^ (RB little)) (ADJP^ (RB right)))) (. .)))))) Convert the transformed tree back to its original form >>> un_chomsky_normal_form(markovTree) >>> tree == markovTree True