/* * org_context.jape * * Copyright (c) 1998-2001, The University of Sheffield. * * This file is part of GATE (see http://gate.ac.uk/), and is free * software, licenced under the GNU Library General Public License, * Version 2, June 1991 (in the distribution as file licence.html, * and also available at http://gate.ac.uk/gate/licence.html). * * Diana Maynard, 10 Sep 2001 * * $Id: org_context.jape,v 1.1 2004/09/10 13:33:24 akshay Exp $ */ Phase: Org_Context Input: Token Lookup Organization Unknown Location Break Initials Spur Control Options: control = appelt Macro:INITIALS ( ({Token.orth == upperInitial, Token.length =="1"} {Token.string == "."}) ({Token.orth == upperInitial, Token.length =="1"} {Token.string == "."})+ ) Macro: INITIALS2 ( {Token.orth == allCaps, Token.length == "2"} | {Token.orth == allCaps, Token.length == "3"} ) Rule:OrgContext1 Priority: 1 // company X // company called X ( {Token.string == "company"} (({Token.string == "called"}| {Token.string == "dubbed"}| {Token.string == "named"} ) )? ) ( {Unknown.kind == PN} ) :org --> { gate.AnnotationSet org = (gate.AnnotationSet) bindings.get("org"); gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule ", "OrgContext1"); annotations.add(org.firstNode(), org.lastNode(), "Organization", features); annotations.removeAll(org); } Rule: OrgContext2 Priority: 5 // Telstar laboratory // Medici offices ( {Unknown.kind == PN} ): org ( ({Token.string == "offices"} | {Token.string == "Offices"} | {Token.string == "laboratory"} | {Token.string == "Laboratory"} | {Token.string == "laboratories"} | {Token.string == "Laboratories"}) ) --> { gate.AnnotationSet org = (gate.AnnotationSet) bindings.get("org"); gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule ", "OrgContext2"); annotations.add(org.firstNode(), org.lastNode(), "Organization", features); annotations.removeAll(org); } Rule:OrgContext3 Priority: 5 // X shares ( {Unknown.kind == PN} ):org ( {Token.string == "shares"} ) --> { gate.AnnotationSet org = (gate.AnnotationSet) bindings.get("org"); gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule ", "OrgContext3"); annotations.add(org.firstNode(), org.lastNode(), "Organization", features); annotations.removeAll(org); } Rule:OrgContext4 Priority: 10 // shares in X ( ({Token.string == "shares"}| {Token.string == "stake"}) {Token.string == "in"} ) ( {Unknown.kind == PN} ):org --> { gate.AnnotationSet org = (gate.AnnotationSet) bindings.get("org"); gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule ", "OrgContext4"); annotations.add(org.firstNode(), org.lastNode(), "Organization", features); annotations.removeAll(org); } Rule: OrgContext5 Priority: 10 // officials at X ( ({Token.string == "officials"}| {Token.string == "Officials"}) {Token.string == "at"} ) ( {Unknown.kind == PN} ):org --> { gate.AnnotationSet org = (gate.AnnotationSet) bindings.get("org"); gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule ", "OrgContext5"); annotations.add(org.firstNode(), org.lastNode(), "Organization", features); annotations.removeAll(org); } Rule:JoinOrg Priority: 50 // Smith joined Energis // later we should use the morph PR to prevent having to list morphological variants ( ({Token.string == "joined"}| {Token.string == "joining"}| {Token.string == "joins"}| {Token.string == "join"} ) SPACE ) ( {Unknown.kind ==PN} ) :org --> { gate.AnnotationSet org = (gate.AnnotationSet) bindings.get("org"); gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule ", "JoinOrg"); annotations.add(org.firstNode(), org.lastNode(), "Organization", features); annotations.removeAll(org); } Rule:OrgPerson Priority: 20 // Nokia Vice-President William Plummer ( {Unknown.kind == PN} ):org ( ({Token.string == "'"} ({Token.string == "s"})? )? SPACE {Person.rule1 == PersonTitle} ) --> { //get the matched annotation(s) gate.AnnotationSet org = (gate.AnnotationSet) bindings.get("org"); //create the new annotation gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule ", "OrgPerson"); annotations.add(org.firstNode(), org.lastNode(), "Organization", features); //delete the old annotation(s) annotations.removeAll(org); } Rule: OrgConjOrg1 Priority: 10 ( {Unknown.kind == PN} ):org ( (SPACE) {Token.category == CC} (SPACE) {Organization} ) --> { gate.AnnotationSet org = (gate.AnnotationSet) bindings.get("org"); gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule ", "OrgConjOrg1"); annotations.add(org.firstNode(), org.lastNode(), "Organization", features); annotations.removeAll(org); } Rule: OrgConjOrg2 Priority: 10 ( {Organization} (SPACE) {Token.category == CC} (SPACE) ) ( {Unknown.kind == PN} ):org --> { gate.AnnotationSet org = (gate.AnnotationSet) bindings.get("org"); gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule ", "OrgConjOrg2"); annotations.add(org.firstNode(), org.lastNode(), "Organization", features); annotations.removeAll(org); } Rule: OrgJobtitle Priority: 30 ( {Unknown.kind == PN} ):org ( SPACE {Lookup.majorType == jobtitle} ) --> { gate.AnnotationSet org = (gate.AnnotationSet) bindings.get("org"); gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule ", "OrgJobTitle"); annotations.add(org.firstNode(), org.lastNode(), "Organization", features); annotations.removeAll(org); } Rule: LocXandOrg Priority: 25 // London Area and Terminal Control Centre ( {Location} {Unknown.kind == PN} {Token.category == CC} {Organization} ):org --> { //removes Unknown annotation, adds a new Org annotation gate.AnnotationSet org = (gate.AnnotationSet) bindings.get("org"); gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule", "LocXandOrg"); annotations.add(org.firstNode(), org.lastNode(), "Organization", features); annotations.removeAll(org); } Rule:LocOrg Priority: 20 // guess that Unknown preceded by Loc is an Org ( {Location} {Unknown.kind == PN} ):org --> { //removes Unknown annotation, adds a new Org annotation gate.AnnotationSet org = (gate.AnnotationSet) bindings.get("org"); gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule", "LocOrg"); annotations.add(org.firstNode(), org.lastNode(), "Organization", features); annotations.removeAll(org); } Rule:OrgPublishers Priority: 20 ( {Token.string == "published"} {Token.string == "by"} {Unknown.kind == PN} ):org --> { //removes Unknown annotation, adds a new Org annotation gate.AnnotationSet org = (gate.AnnotationSet) bindings.get("org"); gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule", "OrgPublishers"); annotations.add(org.firstNode(), org.lastNode(), "Organization", features); annotations.removeAll(org); } Rule: OrgGroup Priority: 50 ( {Organization} {Token.string == "group"} ):org --> { //removes old Org annotation, adds a new Org annotation gate.AnnotationSet org = (gate.AnnotationSet) bindings.get("org"); gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule", "OrgGroup"); annotations.add(org.firstNode(), org.lastNode(), "Organization", features); annotations.removeAll(org); } Rule: Spur Priority: 500 ( {Spur} ):spur --> {} Rule:NotOrgInitials Priority: 500 ( {Lookup.majorType == spur} ):spur --> :spur.Spur = {rule = "NotOrgInitials"} Rule: OrgDoNothing Priority: 10000 ( {Organization} ):org --> {} Rule: OrgInitials ( {Initials}| INITIALS | INITIALS2 ):org --> /* only fire if length is more than 3 */ { gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("org"); gate.Annotation orgAnn = (gate.Annotation)orgSet.iterator().next(); long start = orgAnn.getStartNode().getOffset().longValue(); long end = orgAnn.getEndNode().getOffset().longValue(); if (end - start > 3) { gate.FeatureMap features = Factory.newFeatureMap(); features.put("rule", "OrgInitials"); annotations.add(orgSet.firstNode(), orgSet.lastNode(), "Organization", features); } annotations.removeAll(orgSet); }