001 package edu.upenn.cis.propbank_shen; 002 003 import edu.upenn.cis.spinal.*; 004 //import edu.upenn.cis.treebank.TBNode; 005 006 import java.util.*; 007 008 /** 009 This class represents the span of a propbank argument or of the subtree 010 of a sentence 011 by a pair of integers indicating the first word of the 012 argument and the first word that is outside the argument. 013 It corresponds to the class <code>TreeAddress</code> in the original 014 Propbank API and had to be changed to reflect Libin Shen's change 015 to the original propbank as described in his REAMDE file: 016 017 "Word IDs are used to represent the phrases, while in the original Propbank, 018 phrases are represented with the root node of the subtree in PTB." 019 020 The numbers start with zero, i.e. the initial word in a sentence is 021 the zeroth word. 022 023 @author Scott Cotton 024 */ 025 public class WordSpan implements Comparable { 026 027 /** the number of the initial word of this span 028 */ 029 private int start; 030 /** 031 the number of the final word of this span 032 */ 033 private int end; 034 035 private WordSpan(){}; 036 037 /** 038 * Construct a word span from a start and an end position 039 */ 040 public WordSpan(int s, int e) { 041 start = s; 042 end = e; 043 } 044 045 /** 046 * Merge two <code>WordSpan</code>s. Returns the smallest span that contains 047 * each of the input spans. Always returns a continuous span. 048 * If one of the arguments is null then the other one is returned. 049 * If both of the arguments are null then an exception is raised. 050 */ 051 public static WordSpan combine(WordSpan w1, WordSpan w2) { 052 053 if (w1 == null && w2 == null) 054 throw new IllegalArgumentException(); 055 056 if (w1 == null) return w2; 057 if (w2 == null) return w1; 058 059 WordSpan current=new WordSpan(); 060 061 if (w1.start() < w2.start()) { 062 current.start = w1.start(); 063 } else { 064 current.start = w2.start(); 065 } 066 if (w1.end() > w2.end()) { 067 current.end = w1.end(); 068 } else { 069 current.end = w2.end(); 070 } 071 return current; 072 } 073 074 /** 075 * Merge any number of <code>WordSpan</code>s. Returns the smallest span that contains 076 * each of the input spans. Always returns a continuous span. Returns 077 * an exception 078 * if the iterator argument is empty. 079 * @param wordSpans an <code>Iterator</code> containing spans 080 * @return a <code>WordSpan</code> that contains each of the input spans 081 */ 082 public static WordSpan merge(Iterator wordSpans) { 083 if (!wordSpans.hasNext()) throw new IllegalArgumentException("Attempted" + 084 " to merge zero WordSpans"); 085 WordSpan current=null; 086 current = (WordSpan) wordSpans.next(); 087 int min = current.start(); 088 int max = current.end(); 089 int currentMin, currentMax; 090 091 while (wordSpans.hasNext()) { 092 current = (WordSpan) wordSpans.next(); 093 currentMin = current.start(); 094 currentMax = current.end(); 095 if (currentMin < min) min = currentMin; 096 if (currentMax > max) max = currentMax; 097 } 098 return new WordSpan(min, max); 099 } 100 101 /** 102 * Returns the node at this address, relative to root (which should 103 * probably be the actual root of a tree). 104 */ 105 // public TAGNode addressedNode (TAGNode root) { 106 // TAGNode current = (TAGNode)root.getTerminals().get(start); 107 // for (int i=0; i < end; i++) { 108 // if (current == null) 109 // throw new IllegalArgumentException("Terminal "+ 110 // root.getTerminals().get(start) + " is only "+ 111 // i + " deep, not "+end+"."); 112 // current = current.getParent(); 113 // } 114 // return current; 115 // } 116 117 // // TODO replace dummy method which is only there so that the code compiles 118 // public TBNode addressedNode(TBNode root) { 119 // throw new UnsupportedOperationException 120 // ("dummy method which is only there so that the code compiles"); 121 // } 122 123 124 125 126 /** 127 * Attempts to find a subtree whose root ElemTree yields the words 128 * described by this span. Returns null otherwise. 129 */ 130 // TODO use span lookup table (subTreeForSpan) in s to do this more quickly 131 public ElemTree getSubTree(Sentence s) { 132 return s.getSubTree(this); 133 } 134 135 136 public boolean equals (Object o) { 137 if (!(o instanceof WordSpan)) 138 return false; 139 WordSpan ta = (WordSpan)o; 140 return start() == ta.start() && end() == ta.end(); 141 } 142 143 // Need to override hashCode() because we're implementing a lookup table 144 // (a HashMap) for spans in class Sentence.java 145 public int hashCode () { 146 return this.toString().hashCode(); 147 } 148 149 /** 150 Comparison of node addresses just uses comparison 151 of start numbers backing off to end if 152 start numbers are equal. 153 */ 154 public int compareTo(Object o) { 155 WordSpan oal = (WordSpan) o; 156 if (start() != oal.start()) { 157 return start() - oal.start(); 158 } else { 159 return end() - oal.end(); 160 } 161 } 162 163 /** 164 Return a string of the form <start>_<end> 165 */ 166 public String toString() { 167 return start() + "_" + end(); 168 } 169 170 171 172 /** 173 * 174 * Creates a WordSpan instance from a string 175 * of the form <pre> <terminal>_<end> </pre> 176 * @param s 177 * 178 */ 179 public static WordSpan ofString(String s) throws CorruptDataException 180 { 181 // TODO replace this "magic symbol" by SEPARATOR field 182 int colidx = s.indexOf('_'); 183 if (colidx == -1) { 184 throw new CorruptDataException("invalid basic argument location: " + s); 185 } 186 Integer t = Integer.decode(s.substring(0, colidx)); 187 Integer h = Integer.decode(s.substring(colidx + 1, s.length())); 188 return new WordSpan(t.intValue(), h.intValue()); 189 } 190 191 192 193 public int start() { 194 return start; 195 } 196 197 public int end() { 198 return end; 199 } 200 }