/* * cr.jape * * Copyright (c) 1998-2001, The University of Sheffield. * * This file is part of GATE (see http://gate.ac.uk/), and is free * software, licenced under the GNU Library General Public License, * Version 2, June 1991 (in the distribution as file licence.html, * and also available at http://gate.ac.uk/gate/licence.html). * * Diana Maynard, 10 Sep 2001 * * $Id: find.jape,v 1.2 2006/04/05 17:02:48 nirajaswani Exp $ */ Phase: find Input: Token SpaceToken Lookup Options: control = all Macro: FULLSTOP ( ({Token.string=="."} | {Token.string=="ред"}) ) Macro: THREEDOTS ( {Token.string=="."} {Token.string=="."} {Token.string=="."} ) Macro: PUNCT ( ({Token.string == "!"} | {Token.string == "?"} ) ) Rule: Ldots ( ({Token})? THREEDOTS (FULLSTOP)+ ):fake --> {} Rule: Split1 ( (PUNCT | FULLSTOP | THREEDOTS) ({Token.string == "\""})? ) :split --> :split.Split = {kind = "internal"} // Rule: CR // must be at least 2 CRs or Newlines plus optional spaces to generate a split ( ({SpaceToken.string == "\n"}| ({SpaceToken.string=="\n"}{SpaceToken.string=="\r"})) ({SpaceToken.string == "\n"}| ({SpaceToken.string=="\n"}{SpaceToken.string=="\r"}))+ ({SpaceToken.kind == space})* ):cr --> :cr.Split = {kind = "external"} Rule:Fake ( ({Token}{Token.string == "."})+ ({Token.kind == word}|{Token.kind == number}) ):fake --> {} Rule: Abbrev1 ( {Lookup.majorType == "abbreviation"} {Token.string == "."} ):fake --> {} Rule: Abbrev2 ({Token.orth=="upperInitial", Token.length=="1"} FULLSTOP)+ :fake --> {}