/* * name.jape * * Copyright (c) 1998-2001, The University of Sheffield. * * This file is part of GATE (see http://gate.ac.uk/), and is free * software, licenced under the GNU Library General Public License, * Version 2, June 1991 (in the distribution as file licence.html, * and also available at http://gate.ac.uk/gate/licence.html). * * Diana Maynard, 10 Sep 2001 * * $Id: name_old.jape,v 1.1 2004/09/10 12:41:20 akshay Exp $ */ Phase: Name Input: Token Lookup Title FirstPerson Split Upper Break Options: control = appelt /////////////////////////////////////////////////////////////// // Person Rules ///////////////////////////////////////////////////////////////// Macro: QUOTE ( {Token.kind=="punctuation",Token.string == "\""} | ( {Token.kind=="symbol",Token.string == "`"} {Token.kind=="symbol",Token.string == "`"} ) | ( {Token.kind=="punctuation",Token.string == "'"} {Token.kind=="punctuation",Token.string == "'"} ) ) Macro: TITLE ( {Title} ({Token.string == "."})? ({Title})? ({Token.string == "."})? ) Macro: INITIALS ( ({Token.orth == upperInitial, Token.length =="1"} ({Token.string == "."})? )+ ) Macro: INITIALS2 ( {Token.orth == allCaps, Token.length == "2"} | {Token.orth == allCaps, Token.length == "3"} ) Macro: FIRSTNAME ( ({FirstPerson.gender == male} | {FirstPerson.gender == female}) | (INITIALS) ) Macro: FIRSTNAMEAMBIG ( {Lookup.majorType == person_first, Lookup.minorType == ambig} ) Macro: PERSONENDING ( {Lookup.majorType == person_ending} ) Macro: PREFIX ( {Lookup.majorType == surname, Lookup.minorType == prefix} | {Token.string == "Al-"}|{Token.string == "al-"}| (({Token.string == "O"}|{Token.string == "D"}) {Token.string == "'"} )+ ) Macro: DET ( ({Token.string == "A"} | {Token.string == "a"} | {Token.string == "The"} |{Token.string == "the"}| {Token.string == "One"} |{Token.string == "one"}) ) /////////////////////////////////////////////////////////// // Person Rules Rule: Pronoun Priority: 1000 //stops personal pronouns being recognised as Initials ( {Token.category == PP}| {Token.category == PRP}| {Token.category == RB} ):pro --> {} Rule: NotPersonFirst Priority: 1000 // stops modal verbs being recognised as FirstNames ( {Token.category == MD} ):spur --> :spur.Spur = {} Rule: GazPerson Priority: 50 ( {Lookup.majorType == person_full, Lookup.minorType == normal} ) :person --> { gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person"); gate.Annotation personAnn = (gate.Annotation)person.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); features.put("kind", "personName"); features.put("rule", "GazPerson"); annotations.add(person.firstNode(), person.lastNode(), "TempPerson", features); } Rule: GazPersonFirst Priority: 70 ( {Token.category == DT}| {Token.category == PRP}| {Token.category == RB} )? ( (TITLE)? {FirstPerson} ) :person ( {Token.orth == upperInitial, Token.length == "1"} )? --> { gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person"); gate.Annotation personAnn = (gate.Annotation)person.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); features.put("gender", personAnn.getFeatures().get("gender")); features.put("kind", "personName"); features.put("rule", "GazPersonFirst"); annotations.add(person.firstNode(), person.lastNode(), "TempPerson", features); //annotations.removeAll(person); } Rule: PersonFirstContext2 Priority: 40 // Anne and I ( {FirstPerson} ):person ( {Token.string == "and"} {Token.length == "1"} ) --> { gate.FeatureMap features = Factory.newFeatureMap(); gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); gate.AnnotationSet firstPerson = (gate.AnnotationSet)personSet.get("FirstPerson"); if (firstPerson != null && firstPerson.size()>0) { gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next(); features.put("gender", personAnn.getFeatures().get("gender")); } features.put("kind", "personName"); features.put("rule", "PersonFirstContext2"); annotations.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } Rule: TitleOnly Priority: 100 // Deputy Prime Minister ( (TITLE) ):spur --> :spur.Spur = {} Rule: NotPersonTitle Priority: 100 ( (TITLE) {Lookup.majorType == organization} {Lookup.majorType == jobtitle} ) :spur --> :spur.Spur = {rule = NotPersonTitle} Rule: NotPersonTitle2 Priority: 100 ( (TITLE) {Lookup.majorType == spur} (TITLE) ) :spur --> :spur.Spur = {rule = NotPersonTitle2} Rule: PersonTitle Priority: 35 // Mr. Jones // Mr Fred Jones // note we only allow one first and surname, // but we can add more in a final phase if we find adjacent unknowns ( (TITLE) (FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2)* (PREFIX)? ({Upper}) ({Upper})? (PERSONENDING)? ) :person --> { gate.FeatureMap features = Factory.newFeatureMap(); gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); // get all Title annotations that have a gender feature HashSet fNames = new HashSet(); fNames.add("gender"); gate.AnnotationSet personTitle = personSet.get("Title", fNames); // if the gender feature exists if (personTitle != null && personTitle.size()>0) { //Out.prln("Titles found " + personTitle); gate.Annotation personAnn = (gate.Annotation)personTitle.iterator().next(); features.put("gender", personAnn.getFeatures().get("gender")); } else { //get all firstPerson annotations that have a gender feature // HashSet fNames = new HashSet(); // fNames.add("gender"); gate.AnnotationSet firstPerson = personSet.get("FirstPerson", fNames); if (firstPerson != null && firstPerson.size()>0) { //Out.prln("First persons found " + firstPerson); gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next(); features.put("gender", personAnn.getFeatures().get("gender")); } } features.put("kind", "personName"); features.put("rule", "PersonTitle"); annotations.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } Rule: PersonFirstTitleGender Priority: 55 // use this rule when we know what gender the title indicates // Mr Fred ( ({Title.gender == male} | {Title.gender == female}) ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2) ) ) :person --> { gate.FeatureMap features = Factory.newFeatureMap(); gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); gate.AnnotationSet title = (gate.AnnotationSet)personSet.get("Title"); if (title != null && title.size()>0) { gate.Annotation personAnn = (gate.Annotation)title.iterator().next(); features.put("gender", personAnn.getFeatures().get("gender")); } features.put("kind", "personName"); features.put("rule", "PersonFirstTitleGender"); annotations.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } Rule: PersonTitleGender Priority: 18 // use this rule if the title has a feature gender // Miss F Smith ( ({Title.gender == male}| {Title.gender == female} ) ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2) )* ({Upper}) (PERSONENDING)? ) :person --> { gate.FeatureMap features = Factory.newFeatureMap(); gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); gate.AnnotationSet title = (gate.AnnotationSet)personSet.get("Title"); // if the annotation type title doesn't exist, do nothing if (title != null && title.size()>0) { // if it does exist, take the first element in the set gate.Annotation personAnn = (gate.Annotation)title.iterator().next(); //propagate gender feature (and value) from title features.put("gender", personAnn.getFeatures().get("gender")); } // create some new features features.put("kind", "personName"); features.put("rule", "PersonTitleGender"); // create a TempPerson annotation and add the features we've created annotations.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } Rule: PersonJobTitle Priority: 20 // note we include titles but not jobtitles in markup ( {Lookup.majorType == jobtitle} ):jobtitle ( (TITLE)? (FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2)+ (PREFIX)* ({Upper}) (PERSONENDING)? ) :person --> :person.TempPerson = {kind = "personName", rule = "PersonJobTitle"}, :jobtitle.JobTitle = {rule = "PersonJobTitle"} Rule: FirstPersonStop Priority: 20 // surname contains stop words // e.g. Mary And ( (FIRSTNAME)+ ):person ( ( ({Lookup.majorType == stop}| {Token.category == DT}) ) ) --> { gate.FeatureMap features = Factory.newFeatureMap(); gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); gate.AnnotationSet firstPerson = (gate.AnnotationSet)personSet.get("FirstPerson"); if (firstPerson != null && firstPerson.size()>0) { gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next(); features.put("gender", personAnn.getFeatures().get("gender")); } features.put("kind", "personName"); Double num = new Double((double)20); features.put("confidence", num); features.put("rule", "FirstPersonStop"); annotations.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } Rule: NotFirstPersonStop Priority: 50 // ambig first name and surname is stop word // e.g. Will And ( ((FIRSTNAMEAMBIG)+ | {Token.category == PRP}| {Token.category == DT} ) ({Lookup.majorType == stop} ) ) :person --> :person.Spur = {rule = "NotFirstPersonStop"} Rule: NotPersonFull Priority: 50 // Det + Surname ( {Token.category == DT}| {Token.category == PRP}| {Token.category == RB} ) ( (PREFIX)* ({Upper}) (PERSONENDING)? ):foo --> :foo.Spur = {rule = "NotPersonFull"} Rule: NotPersonFull2 Priority: 50 // don't recognise INITIALS + Company/Org ending ( {Token.category == DT} )? ( ((INITIALS)|(INITIALS2)) ({Lookup.majorType == org_base}| {Lookup.majorType == govern_key}| {Lookup.majorType == org_key}) ):spur --> :spur.Spur = {rule = NotPersonFull2} Rule: FirstPersonStop Priority: 50 // John And ( {FirstPerson} ):person ( {Token.category == DT}| {Token.category == PRP}| {Token.category == RB}| {Token.category == IN} ) --> { gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person"); gate.Annotation personAnn = (gate.Annotation)person.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); features.put("gender", personAnn.getFeatures().get("gender")); features.put("kind", "personName"); features.put("rule", "GazPersonFirst"); annotations.add(person.firstNode(), person.lastNode(), "TempPerson", features); } Rule: LocPersonAmbig Priority: 50 // Location + Surname ( {Lookup.majorType == location} ):loc ( (PREFIX)* ({Upper}) (PERSONENDING)? ):foo --> :loc.TempLocation = {kind = "locName", rule = LocPersonAmbig} Rule: PersonFull Priority: 10 // F.W. Jones // Fred Jones ( {Token.category == DT} )? ( ((FIRSTNAME | FIRSTNAMEAMBIG) )+ (PREFIX)* {Upper} (PERSONENDING)? ) :person --> { gate.FeatureMap features = Factory.newFeatureMap(); gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); //get all firstPerson annotations that have a gender feature HashSet fNames = new HashSet(); fNames.add("gender"); gate.AnnotationSet firstPerson = personSet.get("FirstPerson", fNames); if (firstPerson != null && firstPerson.size()>0) { //Out.prln("First persons found " + firstPerson); gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next(); features.put("gender", personAnn.getFeatures().get("gender")); } features.put("kind", "personName"); features.put("rule", "PersonFull"); annotations.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } Rule: PersonFullStop Priority: 50 // G.Wilson Fri ( ((FIRSTNAME | FIRSTNAMEAMBIG) ) (PREFIX)* ({Upper}) ):person ( {Lookup.majorType == date} ) --> :person.TempPerson = {kind = "personName", rule = "PersonFullStop"} Rule: NotPersonFullReverse Priority: 20 // XYZ, I ( ({Upper}) {Token.string == ","} {Token.category == PRP} (PERSONENDING)? ) :unknown --> :unknown.Spur = {rule = "NotPersonFullReverse"} Rule: PersonFullReverse Priority: 5 // Jones, F.W. // don't allow Jones, Fred because too ambiguous // Smith, TF ( {Token.category ==NNP} {Token.string == ","} (INITIALS )+ (PERSONENDING)? ) :person --> { gate.FeatureMap features = Factory.newFeatureMap(); gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); gate.AnnotationSet firstPerson = (gate.AnnotationSet)personSet.get("FirstPerson"); if (firstPerson != null && firstPerson.size()>0) { gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next(); features.put("gender", personAnn.getFeatures().get("gender")); } features.put("kind", "personName"); features.put("rule", "PersonFullReverse"); annotations.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } Rule: PersonSaint Priority: 50 // Note: ensure that it's not a Saints Day first ( ({Token.string == "St"} ({Token.string == "."})? | {Token.string == "Saint"}) (FIRSTNAME) ) :person --> { gate.FeatureMap features = Factory.newFeatureMap(); gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); gate.AnnotationSet firstPerson = (gate.AnnotationSet)personSet.get("FirstPerson"); if (firstPerson != null && firstPerson.size()>0) { gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next(); features.put("gender", personAnn.getFeatures().get("gender")); } features.put("kind", "personName"); features.put("rule", "PersonSaint"); annotations.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } Rule: PersonLocAmbig Priority: 40 // Ken London // Susan Hampshire // Christian name + Location --> Person's Name ( ({Lookup.majorType == person_first} | (INITIALS {Token.string == "."}) ) {Lookup.majorType == location} ) :person --> { gate.FeatureMap features = Factory.newFeatureMap(); gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); gate.AnnotationSet firstPerson = (gate.AnnotationSet)personSet.get("FirstPerson"); if (firstPerson != null && firstPerson.size()>0) { gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next(); features.put("gender", personAnn.getFeatures().get("gender")); } features.put("kind", "personName"); features.put("rule", "PersonLocAmbig"); annotations.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } Rule: OrgTitlePerson Priority: 51 // Org + Title + Person // if Org + Title is recognised as Person, change to Org + Title ( {Lookup.majorType == organization} {Title} ) ( (PREFIX)* ({Upper})+ (PERSONENDING)? ):person --> { gate.FeatureMap features = Factory.newFeatureMap(); gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); //get all firstPerson annotations that have a gender feature HashSet fNames = new HashSet(); fNames.add("gender"); gate.AnnotationSet firstPerson = personSet.get("FirstPerson", fNames); if (firstPerson != null && firstPerson.size()>0) { //Out.prln("First persons found " + firstPerson); gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next(); features.put("gender", personAnn.getFeatures().get("gender")); } features.put("kind", "personName"); features.put("rule", "OrgTitlePerson"); annotations.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } Rule: ThePersonOrgAmbig Priority: 60 ( {Token.category == DT} ) ( (FIRSTNAME|INITIALS|INITIALS2)? ({Upper}) ({Lookup.majorType == org_base}| {Lookup.majorType == govern_key}| {Lookup.majorType == org_key}) ):spur --> :spur.TempOrganization = {rule = ThePersonOrgAmbig} Rule: PersonOrgAmbig Priority: 50 // full name + Company/Org ending is Org not Person ( ({FirstPerson.gender == male} | {FirstPerson.gender == female}| INITIALS2 )+ ({Upper}) ({Lookup.majorType == org_base}| {Lookup.majorType == govern_key}| {Lookup.majorType == org_key}) ):spur --> :spur.TempOrganization = {rule = PersonOrgAmbig} Rule: APersonFull Priority: 20 ( {Token.string == "A"} ) ( (PREFIX)* ({Upper}|INITIALS)+ (PERSONENDING)? ) :spur --> :spur.Spur = {rule = "APersonFull"} Rule: PersonAlias Priority: 10 // Masashi ``Jumbo'' Ozakim ( ((FIRSTNAME | FIRSTNAMEAMBIG) )+ (QUOTE) ({Upper}) (QUOTE) ({Upper}) (PERSONENDING)? ) :person --> { gate.FeatureMap features = Factory.newFeatureMap(); gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); //get all firstPerson annotations that have a gender feature HashSet fNames = new HashSet(); fNames.add("gender"); gate.AnnotationSet firstPerson = personSet.get("FirstPerson", fNames); if (firstPerson != null && firstPerson.size()>0){ //Out.prln("First persons found " + firstPerson); java.util.List fpList = new java.util.ArrayList(firstPerson); java.util.Collections.sort(fpList, new gate.util.OffsetComparator()); gate.Annotation personAnn = (gate.Annotation)fpList.get(0); features.put("gender", personAnn.getFeatures().get("gender")); } features.put("kind", "personName"); features.put("rule", "PersonAlias"); annotations.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } /////////////////////////////////////////////////////////////////// // Organisation Rules Macro: CDG // cdg is something like "Ltd." ( ({Lookup.majorType == cdg})| ({Token.string == ","} {Lookup.majorType == cdg}) ) Macro: SAINT ( ({Token.string == "St"} ({Token.string == "."})? | {Token.string == "Saint"}) ) Macro: CHURCH ( {Token.string == "Church"}|{Token.string == "church"}| {Token.string == "Cathedral"}|{Token.string == "cathedral"}| {Token.string == "Chapel"}|{Token.string == "chapel"} ) ///////////////////////////////////////////////////////////// Rule: OrgSpur Priority: 300 ( {Lookup.majorType == spur} ):spur --> :spur.Spur = {rule = OrgSpur} Rule: OrgAllInitials Priority: 25 ( (INITIALS) (INITIALS)+ ):org --> :org.TempOrganization = {rule = OrgAllInitials} Rule: GazOrganization Priority: 145 ( {Token.category == DT} )? ( {Lookup.majorType == organization} ) :orgName --> { gate.FeatureMap features = Factory.newFeatureMap(); // create an annotation set consisting of all the annotations for org gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("orgName"); // create an annotation set consisting of the annotation matching Lookup gate.AnnotationSet org = (gate.AnnotationSet)orgSet.get("Lookup"); // if the annotation type Lookup doesn't exist, do nothing if (org != null && org.size()>0) { // if it does exist, take the first element in the set gate.Annotation orgAnn = (gate.Annotation)org.iterator().next(); //propagate minorType feature (and value) from org features.put("orgType", orgAnn.getFeatures().get("minorType")); } // create some new features features.put("rule", "GazOrganization"); // create a TempOrg annotation and add the features we've created annotations.add(orgSet.firstNode(), orgSet.lastNode(), "TempOrganization", features); } Rule: LocOrganization Priority: 50 // Ealing Police ( ({Lookup.majorType == location} | {Lookup.majorType == country_adj}) {Lookup.majorType == organization} ({Lookup.majorType == organization})? ) :orgName --> :orgName.TempOrganization = {kind = "orgName", rule=LocOrganization} Rule: OrgXandY Priority: 20 // Bradford & Bingley // Bradford & Bingley Ltd ( {Token.category == IN} )? ( ({Token.category == NNP} )+ {Token.string == "&"} ( {Token.orth == upperInitial} )+ (CDG)? ) :orgName --> :orgName.TempOrganization = {kind = "unknown", rule = "OrgXandY"} Rule:OrgUni Priority: 25 // University of Sheffield // Sheffield University // A Sheffield University ( {Token.string == "University"} {Token.string == "of"} ( {Token.category == NNP})+ ) :orgName --> :orgName.TempOrganization = {kind = "org", rule = "OrgDept"} Rule: OrgDept Priority: 25 // Department of Pure Mathematics and Physics ( {Token.string == "Department"} {Token.string == "of"} ( {Token.orth == upperInitial})+ ( {Token.string == "and"} ( {Token.orth == upperInitial})+ )? ) :orgName --> :orgName.TempOrganization = {kind = "department", rule = "OrgDept"} Rule: OrgXKey Priority: 125 // Aaaa Ltd. // Xxx Services Ltd. ( {Token.category == DT} )? ( ({Upper}) ({Upper})? ({Upper})? ({Upper})? ({Upper})? {Lookup.majorType == org_key} ({Lookup.majorType == org_ending})? ) :orgName --> :orgName.TempOrganization = {kind = "unknown", rule = "OrgXKey"} Rule: NotOrgXEnding Priority: 500 // Very Limited ( {Token.category == DT} )? ( {Token.category == RB} {Lookup.majorType == cdg} ) :label --> :label.Spur = {rule = "NotOrgXEnding"} Rule: OrgXEnding Priority: 120 // Coca Cola Co. ( {Token.category == DT} )? ( ({Upper}) ({Upper})? {Lookup.majorType == cdg} ) :orgName --> :orgName.TempOrganization = {kind = "unknown", rule = "OrgXEnding"} Rule: OrgXandYKey Priority: 120 // Aaaa Ltd. // Xxx Services Ltd. // AA and BB Services Ltd. // but NOT A XXX Services Ltd. ( {Token.category == DT} )? ( ({Upper}) ({Upper})? (({Token.string == "and"} | {Token.string == "&"}) ({Upper})? ({Upper})? ({Upper})? ) {Lookup.majorType == org_key} ({Lookup.majorType == org_ending})? ) :orgName --> :orgName.TempOrganization = {kind = "unknown", rule = "OrgXandYKey"} Rule: OrgXsKeyBase Priority: 120 // Gandy's Circus // Queen's Ware ( {Token.category == DT}| {Token.category == PRP}| {Token.category == RB} )? ( ({Upper})? ({Upper})? ({Token.orth == upperInitial} {Token.string == "'"} ({Token.string == "s"})? ) ({Lookup.majorType == org_key}| {Lookup.majorType == org_base}) ) :orgName --> :orgName.TempOrganization = {kind = "org", rule = "OrgXsKeybase"} Rule: NotOrgXBase Priority: 1000 // not things like British National // or The University ( ({Token.category == DT} )? ) ( ({Lookup.majorType == country_adj}| {Token.orth == lowercase}) ({Lookup.majorType == org_base}| {Lookup.majorType == govern_key}) ) :orgName --> :orgName.Temp = {kind = "notorgName", rule = "NotOrgXBase"} Rule: OrgXBase Priority: 130 // same as OrgXKey but uses base instead of key // includes govern_key e.g. academy // Barclays Bank // Royal Academy of Art ( ({Token.category == DT} )? ) ( ( {Upper}| {Lookup.majorType == organization} ) ({Upper})? ({Upper})? ({Lookup.majorType == org_base}| {Lookup.majorType == govern_key} ) ( ({Token.string == "of"}| {Token.string == "for"}) ({Token.category == DT})? ({Upper}) ({Upper})? ({Upper})? )? ) :orgName --> :orgName.TempOrganization = {kind = "unknown", rule = "OrgXBase"} Rule: BaseofOrg Priority: 130 ( (DET)? ) ( ({Lookup.majorType == org_base}| {Lookup.majorType == govern_key} ) {Token.string == "of"} ( {Token.category == DT} )? ({Upper}) ({Upper})? ) :orgName --> :orgName.TempOrganization = {kind = "unknown", rule = "BaseofOrg"} Rule: OrgPreX Priority: 130 // Royal Tuscan ( {Lookup.majorType == org_pre} ( {Token.orth == upperInitial})+ ({Lookup.majorType == org_ending})? ) :orgName --> :orgName.TempOrganization = {kind = "unknown", rule = "OrgPreX"} Rule: OrgChurch Priority: 150 // St. Andrew's Church ( (SAINT) {Token.orth == upperInitial} {Token.string == "'"}({Token.string == "s"})? (CHURCH) ) :orgName --> :orgName.TempOrganization = {kind = "org", rule = "OrgChurch"} Rule:OrgPersonAmbig Priority: 130 // Alexandra Pottery should be org not person // overrides PersonFull ( (FIRSTNAME ) ({Lookup.majorType == org_key}| {Lookup.majorType == org_base}) ({Lookup.majorType == org_ending})? ) :org --> :org.TempOrganization= {kind = "unknown", rule = "OrgPersonAmbig"} ///////////////////////////////////////////////////////////////// // Location rules Rule: Location1 Priority: 75 // Lookup = city, country, province, region, water // Western Europe // South China sea ( {Token.category == DT} )? ( ({Lookup.majorType == loc_key, Lookup.minorType == pre} )? {Lookup.majorType == location} ( {Lookup.majorType == loc_key, Lookup.minorType == post})? ) :locName --> { gate.FeatureMap features = Factory.newFeatureMap(); // create an annotation set consisting of all the annotations for org gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("locName"); // create an annotation set consisting of the annotation matching Lookup gate.AnnotationSet loc = (gate.AnnotationSet)locSet.get("Lookup"); // if the annotation type Lookup doesn't exist, do nothing if (loc != null && loc.size()>0) { // if it does exist, take the first element in the set gate.Annotation locAnn = (gate.Annotation)loc.iterator().next(); //propagate minorType feature (and value) from loc features.put("locType", locAnn.getFeatures().get("minorType")); } // create some new features features.put("rule", "Location1"); // create a TempLoc annotation and add the features we've created annotations.add(locSet.firstNode(), locSet.lastNode(), "TempLocation", features); } Rule: GazLocation Priority: 70 ( {Token.category == DT} )? ( {Lookup.majorType == location} ) :locName --> { gate.FeatureMap features = Factory.newFeatureMap(); // create an annotation set consisting of all the annotations for org gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("locName"); // create an annotation set consisting of the annotation matching Lookup gate.AnnotationSet loc = (gate.AnnotationSet)locSet.get("Lookup"); // if the annotation type Lookup doesn't exist, do nothing if (loc != null && loc.size()>0) { // if it does exist, take the first element in the set gate.Annotation locAnn = (gate.Annotation)loc.iterator().next(); //propagate minorType feature (and value) from loc features.put("locType", locAnn.getFeatures().get("minorType")); } // create some new features features.put("rule", "GazLocation"); // create a TempLoc annotation and add the features we've created annotations.add(locSet.firstNode(), locSet.lastNode(), "TempLocation", features); } Rule: LocationPost Priority: 50 ( {Token.category == DT} )? ( {Token.category == NNP} {Lookup.majorType == loc_key, Lookup.minorType == post} ) :locName --> :locName.TempLocation = {kind = "locName", rule = LocationPost} Rule: LocKeyAdj Priority: 100 ( {Token.category == DT} )? ( ({Lookup.majorType == loc_key, Lookup.minorType == pre} ) ({Lookup.majorType == country_adj}) ( {Lookup.majorType == loc_key, Lookup.minorType == post})? ):spur --> {} Rule:LocKey ( {Token.category == DT} )? ( ({Lookup.majorType == loc_key, Lookup.minorType == pre} ) ({Upper}) ( {Lookup.majorType == loc_key, Lookup.minorType == post})? ) :locName --> :locName.TempLocation = {kind = "locName", rule = LocKey} ///////////////////////////////////////////////////////////////// // Context-based Rules Rule:InLoc1 ( {Token.string == "in"} ) ( {Lookup.majorType == location, Token.orth == allCaps} ) :locName --> :locName.TempLocation = {kind = "locName", rule = InLoc1} Rule:LocGeneralKey Priority: 30 ( {Lookup.majorType == loc_general_key} {Token.string == "of"} ) ( ({Upper}) ) :loc --> :loc.TempLocation = {kind = "locName", rule = LocGeneralKey} Rule:OrgContext1 Priority: 1 // company X ( {Token.string == "company"} ) ( ({Upper}) ({Upper})? ({Upper})? ) :org --> :org.TempOrganization= {kind = "orgName", rule = "OrgContext1"} Rule: OrgContext2 Priority: 5 // Telstar laboratory // Medici offices ( ({Upper}) ({Upper})? ({Upper})? ) : org ( ({Token.string == "offices"} | {Token.string == "Offices"} | {Token.string == "laboratory"} | {Token.string == "Laboratory"} | {Token.string == "laboratories"} | {Token.string == "Laboratories"}) ) --> :org.TempOrganization= {kind = "orgName", rule = "OrgContext2"} Rule:JoinOrg Priority: 50 // Smith joined Energis ( ({Token.string == "joined"}| {Token.string == "joining"}| {Token.string == "joins"}| {Token.string == "join"} ) ) ( ({Upper}) ({Upper})? ({Upper})? ) :org --> :org.TempOrganization= {kind = "orgName", rule = "joinOrg"} //////////////////////////////////////////////////////////////////////// // Rules for Facilities Rule: GazFacility Priority: 50 ( {Token.category == DT} )? ( {Lookup.majorType == facility, Lookup.minorType == building} ):facName --> { gate.FeatureMap features = Factory.newFeatureMap(); // create an annotation set consisting of all the annotations for facName gate.AnnotationSet facSet = (gate.AnnotationSet)bindings.get("facName"); // create some new features features.put("rule", "GazFacility"); // create a TempFac annotation and add the features we've created annotations.add(facSet.firstNode(), facSet.lastNode(), "TempFacility", features); annotations.removeAll(facSet); } Rule: LocFacility Priority: 75 // Berlin Wall // Manchester Museum ( {Token.category == DT} )? ( ({Lookup.majorType == loc_key, Lookup.minorType == pre})? ({GazLocation})+ ({Lookup.majorType == facility_key})+ ) :facName --> { gate.FeatureMap features = Factory.newFeatureMap(); // create an annotation set consisting of all the annotations for facName gate.AnnotationSet facSet = (gate.AnnotationSet)bindings.get("facName"); // create some new features features.put("rule", "LocFacility"); // create a TempFac annotation and add the features we've created annotations.add(facSet.firstNode(), facSet.lastNode(), "TempFacility", features); annotations.removeAll(facSet); } Rule: Facility1 Priority: 50 // Stormont Castle ( {Token.category == DT} )? ( ({Token.category == NNP})+ ({Token.string == "'"} {Token.string == "s"})? ({Lookup.majorType == facility_key})+ ) :facName --> { gate.FeatureMap features = Factory.newFeatureMap(); // create an annotation set consisting of all the annotations for facName gate.AnnotationSet facSet = (gate.AnnotationSet)bindings.get("facName"); // create some new features features.put("rule", "Facility1"); // create a TempFac annotation and add the features we've created annotations.add(facSet.firstNode(), facSet.lastNode(), "TempFacility", features); annotations.removeAll(facSet); } Rule: FacilityExt Priority: 30 // Schipol airport ( {Token.category == DT} )? ( ({Token.category == NNP})+ ):facName ( {Lookup.majorType == facility_key_ext} ) --> { gate.FeatureMap features = Factory.newFeatureMap(); // create an annotation set consisting of all the annotations for facName gate.AnnotationSet facSet = (gate.AnnotationSet)bindings.get("facName"); // create some new features features.put("rule", "FacilityExt"); // create a TempFac annotation and add the features we've created annotations.add(facSet.firstNode(), facSet.lastNode(), "TempFacility", features); annotations.removeAll(facSet); } Rule: FacilityExt2 Priority: 30 //airbase of Aviano ( ({Token.category == DT})? ({Lookup.majorType == facility_key}| {Lookup.majorType == facility_key_ext}) {Token.string == "of"} ) ( {Token.category == NNP} )+ :facName --> { gate.FeatureMap features = Factory.newFeatureMap(); // create an annotation set consisting of all the annotations for facName gate.AnnotationSet facSet = (gate.AnnotationSet)bindings.get("facName"); // create some new features features.put("rule", "FacilityExt2"); // create a TempFac annotation and add the features we've created annotations.add(facSet.firstNode(), facSet.lastNode(), "TempFacility", features); annotations.removeAll(facSet); } Rule: FacilityStreet Priority: 30 // Downing Street ( ({Upper})+ {Lookup.minorType == street} ):street --> :street.TempFacility = {rule = "FacilityStreet"}