001 package edu.upenn.cis.propbank_shen;
002
003 import edu.upenn.cis.spinal.*;
004 //import edu.upenn.cis.treebank.TBNode;
005
006 import java.util.*;
007
008 /**
009 This class represents the span of a propbank argument or of the subtree
010 of a sentence
011 by a pair of integers indicating the first word of the
012 argument and the first word that is outside the argument.
013 It corresponds to the class <code>TreeAddress</code> in the original
014 Propbank API and had to be changed to reflect Libin Shen's change
015 to the original propbank as described in his REAMDE file:
016
017 "Word IDs are used to represent the phrases, while in the original Propbank,
018 phrases are represented with the root node of the subtree in PTB."
019
020 The numbers start with zero, i.e. the initial word in a sentence is
021 the zeroth word.
022
023 @author Scott Cotton
024 */
025 public class WordSpan implements Comparable {
026
027 /** the number of the initial word of this span
028 */
029 private int start;
030 /**
031 the number of the final word of this span
032 */
033 private int end;
034
035 private WordSpan(){};
036
037 /**
038 * Construct a word span from a start and an end position
039 */
040 public WordSpan(int s, int e) {
041 start = s;
042 end = e;
043 }
044
045 /**
046 * Merge two <code>WordSpan</code>s. Returns the smallest span that contains
047 * each of the input spans. Always returns a continuous span.
048 * If one of the arguments is null then the other one is returned.
049 * If both of the arguments are null then an exception is raised.
050 */
051 public static WordSpan combine(WordSpan w1, WordSpan w2) {
052
053 if (w1 == null && w2 == null)
054 throw new IllegalArgumentException();
055
056 if (w1 == null) return w2;
057 if (w2 == null) return w1;
058
059 WordSpan current=new WordSpan();
060
061 if (w1.start() < w2.start()) {
062 current.start = w1.start();
063 } else {
064 current.start = w2.start();
065 }
066 if (w1.end() > w2.end()) {
067 current.end = w1.end();
068 } else {
069 current.end = w2.end();
070 }
071 return current;
072 }
073
074 /**
075 * Merge any number of <code>WordSpan</code>s. Returns the smallest span that contains
076 * each of the input spans. Always returns a continuous span. Returns
077 * an exception
078 * if the iterator argument is empty.
079 * @param wordSpans an <code>Iterator</code> containing spans
080 * @return a <code>WordSpan</code> that contains each of the input spans
081 */
082 public static WordSpan merge(Iterator wordSpans) {
083 if (!wordSpans.hasNext()) throw new IllegalArgumentException("Attempted" +
084 " to merge zero WordSpans");
085 WordSpan current=null;
086 current = (WordSpan) wordSpans.next();
087 int min = current.start();
088 int max = current.end();
089 int currentMin, currentMax;
090
091 while (wordSpans.hasNext()) {
092 current = (WordSpan) wordSpans.next();
093 currentMin = current.start();
094 currentMax = current.end();
095 if (currentMin < min) min = currentMin;
096 if (currentMax > max) max = currentMax;
097 }
098 return new WordSpan(min, max);
099 }
100
101 /**
102 * Returns the node at this address, relative to root (which should
103 * probably be the actual root of a tree).
104 */
105 // public TAGNode addressedNode (TAGNode root) {
106 // TAGNode current = (TAGNode)root.getTerminals().get(start);
107 // for (int i=0; i < end; i++) {
108 // if (current == null)
109 // throw new IllegalArgumentException("Terminal "+
110 // root.getTerminals().get(start) + " is only "+
111 // i + " deep, not "+end+".");
112 // current = current.getParent();
113 // }
114 // return current;
115 // }
116
117 // // TODO replace dummy method which is only there so that the code compiles
118 // public TBNode addressedNode(TBNode root) {
119 // throw new UnsupportedOperationException
120 // ("dummy method which is only there so that the code compiles");
121 // }
122
123
124
125
126 /**
127 * Attempts to find a subtree whose root ElemTree yields the words
128 * described by this span. Returns null otherwise.
129 */
130 // TODO use span lookup table (subTreeForSpan) in s to do this more quickly
131 public ElemTree getSubTree(Sentence s) {
132 return s.getSubTree(this);
133 }
134
135
136 public boolean equals (Object o) {
137 if (!(o instanceof WordSpan))
138 return false;
139 WordSpan ta = (WordSpan)o;
140 return start() == ta.start() && end() == ta.end();
141 }
142
143 // Need to override hashCode() because we're implementing a lookup table
144 // (a HashMap) for spans in class Sentence.java
145 public int hashCode () {
146 return this.toString().hashCode();
147 }
148
149 /**
150 Comparison of node addresses just uses comparison
151 of start numbers backing off to end if
152 start numbers are equal.
153 */
154 public int compareTo(Object o) {
155 WordSpan oal = (WordSpan) o;
156 if (start() != oal.start()) {
157 return start() - oal.start();
158 } else {
159 return end() - oal.end();
160 }
161 }
162
163 /**
164 Return a string of the form <start>_<end>
165 */
166 public String toString() {
167 return start() + "_" + end();
168 }
169
170
171
172 /**
173 *
174 * Creates a WordSpan instance from a string
175 * of the form <pre> <terminal>_<end> </pre>
176 * @param s
177 *
178 */
179 public static WordSpan ofString(String s) throws CorruptDataException
180 {
181 // TODO replace this "magic symbol" by SEPARATOR field
182 int colidx = s.indexOf('_');
183 if (colidx == -1) {
184 throw new CorruptDataException("invalid basic argument location: " + s);
185 }
186 Integer t = Integer.decode(s.substring(0, colidx));
187 Integer h = Integer.decode(s.substring(colidx + 1, s.length()));
188 return new WordSpan(t.intValue(), h.intValue());
189 }
190
191
192
193 public int start() {
194 return start;
195 }
196
197 public int end() {
198 return end;
199 }
200 }