001    package edu.upenn.cis.propbank_shen;
002    
003    import edu.upenn.cis.spinal.*;
004    //import edu.upenn.cis.treebank.TBNode;
005    
006    import java.util.*;
007    
008    /**
009       This class represents the span of a propbank argument or of the subtree
010       of a sentence
011       by a pair of integers indicating the first word of the 
012       argument and the first word that is outside the argument.
013       It corresponds to the class <code>TreeAddress</code> in the original 
014       Propbank API and had to be changed to reflect Libin Shen's change
015       to the original propbank as described in his REAMDE file:
016     
017       "Word IDs are used to represent the phrases, while in the original Propbank, 
018       phrases are represented with the root node of the subtree in PTB."   
019       
020       The numbers start with zero, i.e. the initial word in a sentence is
021       the zeroth word.
022     
023       @author Scott Cotton
024    */
025    public class WordSpan implements Comparable {
026    
027        /** the number of the initial word of this span
028         */
029        private int start;
030        /**
031           the number of the final word of this span
032         */
033        private int end;
034    
035        private WordSpan(){};
036    
037        /** 
038         * Construct a word span from a start and an end position
039         */
040        public WordSpan(int s, int e) {
041            start = s;
042            end = e;
043        }
044        
045        /**
046         * Merge two <code>WordSpan</code>s. Returns the smallest span that contains
047         * each of the input spans. Always returns a continuous span.
048         * If one of the arguments is null then the other one is returned.
049         * If both of the arguments are null then an exception is raised.
050         */
051        public static WordSpan combine(WordSpan w1, WordSpan w2) {
052            
053            if (w1 == null && w2 == null) 
054                throw new IllegalArgumentException();
055            
056            if (w1 == null) return w2;
057            if (w2 == null) return w1;
058            
059            WordSpan current=new WordSpan();
060            
061            if (w1.start() < w2.start()) {
062                current.start = w1.start();
063            } else {
064                current.start = w2.start();
065            }
066            if (w1.end() > w2.end()) {
067                current.end = w1.end();
068            } else {
069                current.end = w2.end();
070            }
071            return current;
072        }
073        
074        /**
075         * Merge any number of <code>WordSpan</code>s. Returns the smallest span that contains
076         * each of the input spans. Always returns a continuous span. Returns 
077         * an exception
078         * if the iterator argument is empty.
079         * @param wordSpans an <code>Iterator</code> containing spans
080         * @return a <code>WordSpan</code> that contains each of the input spans
081         */
082        public static WordSpan merge(Iterator wordSpans) {
083            if (!wordSpans.hasNext()) throw new IllegalArgumentException("Attempted" +
084                    " to merge zero WordSpans");
085            WordSpan current=null;
086            current = (WordSpan) wordSpans.next();
087            int min = current.start();
088            int max = current.end();
089            int currentMin, currentMax;
090            
091            while (wordSpans.hasNext()) {
092                current = (WordSpan) wordSpans.next();
093                currentMin = current.start();
094                currentMax = current.end();
095                if (currentMin < min) min = currentMin;
096                if (currentMax > max) max = currentMax;
097            }
098            return new WordSpan(min, max);
099        }
100    
101        /**
102         * Returns the node at this address, relative to root (which should
103         * probably be the actual root of a tree).
104         */
105    //    public TAGNode addressedNode (TAGNode root) {
106    //        TAGNode current = (TAGNode)root.getTerminals().get(start);
107    //        for (int i=0; i < end; i++) {
108    //            if (current == null)
109    //                throw new IllegalArgumentException("Terminal "+
110    //                      root.getTerminals().get(start) + " is only "+
111    //                      i + " deep, not "+end+".");
112    //            current = current.getParent();
113    //        }
114    //        return current;
115    //    }
116        
117    //    // TODO replace dummy method which is only there so that the code compiles
118    //    public TBNode addressedNode(TBNode root) {
119    //        throw new UnsupportedOperationException
120    //                ("dummy method which is only there so that the code compiles");
121    //    }
122        
123        
124        
125        
126        /** 
127         * Attempts to find a subtree whose root ElemTree yields the words
128         * described by this span. Returns null otherwise.
129         */
130        // TODO use span lookup table (subTreeForSpan) in s to do this more quickly
131        public ElemTree getSubTree(Sentence s) {
132            return s.getSubTree(this);
133        }
134        
135    
136        public boolean equals (Object o) {
137            if (!(o instanceof WordSpan))
138                return false;
139            WordSpan ta = (WordSpan)o;
140            return start() == ta.start() && end() == ta.end();
141        }
142    
143        // Need to override hashCode() because we're implementing a lookup table
144        // (a HashMap) for spans in class Sentence.java
145         public int hashCode () {
146             return this.toString().hashCode();
147         }
148    
149        /**
150           Comparison of node addresses just uses comparison
151           of start numbers backing off to end if
152           start numbers are equal.
153         */
154        public int compareTo(Object o) {
155            WordSpan oal = (WordSpan) o;
156            if (start() != oal.start()) {
157                return start() - oal.start();
158            } else {
159                return end() - oal.end();
160            }
161        }
162    
163        /**
164           Return a string of the form &lt;start&gt;_&lt;end&gt;
165         */
166        public String toString() {
167            return start() + "_" + end();
168        }
169    
170    
171    
172        /**
173         * 
174         * Creates a WordSpan instance from a string
175         * of the form <pre> &lt;terminal&gt;_&lt;end&gt; </pre>
176         * @param s
177         * 
178         */
179        public static WordSpan ofString(String s) throws CorruptDataException
180        {
181            // TODO replace this "magic symbol" by SEPARATOR field
182            int colidx = s.indexOf('_');
183            if (colidx == -1) {
184                throw new CorruptDataException("invalid basic argument location: " + s);
185            }
186            Integer t = Integer.decode(s.substring(0, colidx));
187            Integer h = Integer.decode(s.substring(colidx + 1, s.length()));
188            return new WordSpan(t.intValue(), h.intValue());
189        }
190    
191    
192    
193        public int start() {
194            return start;
195        }
196    
197        public int end() {
198            return end;
199        }
200    }