001    /**
002     * LTAG-spinal API, an interface to the treebank format introduced by Libin Shen.
003     * Copyright (C) 2007  Lucas Champollion
004     *
005     * This program is free software: you can redistribute it and/or modify
006     * it under the terms of the GNU General Public License as published by
007     * the Free Software Foundation, either version 3 of the License, or
008     * (at your option) any later version.
009     * 
010     * This program is distributed in the hope that it will be useful,
011     * but WITHOUT ANY WARRANTY; without even the implied warranty of
012     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
013     * GNU General Public License for more details.
014     *
015     * You should have received a copy of the GNU General Public License
016     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
017     *
018     */
019    package edu.upenn.cis.spinal;
020    
021    import java.io.*;
022    import java.util.*;
023    import java.util.regex.*;
024    import edu.upenn.cis.propbank_shen.*;
025    
026    /**
027     * Represents a sentence (an LTAG-spinal derivation tree) in Libin Shen's
028     * LTAG-spinal treebank.
029     *
030     * A typical sentence is represented in Libin Shen's thesis, page 73.
031     *
032     * @author Lucas Champollion
033     * @author Ryan Gabbard
034     */
035    public class Sentence implements Serializable {
036        
037        
038        /** 
039         * Some sentences in the LTAG-spinal Treebank only consist of the word "skip",
040         * i.e. they're not really there -- the word indicates that the corresponding
041         * sentence in the Penn Treebank has not been included in the LTAG-spinal
042         * treebank.
043         */
044        private boolean skip=false;
045        
046        /**
047         * The number of the Penn Treebank section from which this 
048         * <code>Sentence</code> has been taken, or -1 if not applicable.
049         */
050        private int sectionNumber = -1;
051        
052        /**
053         * The number of the Penn Treebank file from which this 
054         * <code>Sentence</code> has been taken, or -1 if not applicable.
055         */
056        private int fileNumber=-1;
057        
058        /**
059         * The number of the Penn Treebank section from which this 
060         * <code>Sentence</code> has been taken, or -1 if not applicable.
061         */
062        private int sentenceNumber=-1;
063        
064        /**
065         * The root of the sentence.
066         */
067        private ElemTree root;
068        
069        /**
070         * Contains all the elementary trees of this sentence in order.
071         */
072        private ArrayList elemTrees;
073        
074        /**
075         * A lexicon mapping word spans to the corresponding nodes.
076         */
077        private HashMap spanTable;
078        
079        /**
080         * First call to {@link #computeSpans()} sets this to true.
081         */
082        private boolean spansComputed=false;
083        
084        /**
085         * First call to {@link #computeSpanTable()} sets this to true.
086         */    
087        private boolean spanTableComputed=false;
088        
089        
090        /**
091         * Pattern used in method (@link loadFromStringRepresentation(String)}
092         */
093        private Pattern elemTreePattern = 
094                Pattern.compile("^[#|&]",Pattern.MULTILINE);
095        
096        // to be deleted if RelativeClauseFixer is
097        static int multirootedParses=0;
098        
099        /**
100         * Creates a new <code>Sentence</code> object from a string representation following 
101         * the format defined in Libin Shen's thesis.
102         * @param representation a <code>String</code> containing a specification of 
103         * a sentence in LTAG-spinal format
104         * @throws edu.upenn.cis.spinal.ElemTreeFormatException if an error occurs while parsing the string representation
105         */
106        public Sentence(String representation)
107        throws ElemTreeFormatException {
108            elemTrees=new ArrayList();
109            root=null;
110            sentenceNumber=-1;
111            
112            loadFromStringRepresentation(representation);
113        }
114        
115        /**
116         * Convenience method that calls the constructor, to follow the conventions in the Propbank API.
117         * @param representation a <code>String</code> containing a specification of
118         * a sentence in LTAG-spinal format
119         * @return a new {@link Sentence} constructed from the given <code>String</code>
120         * @throws edu.upenn.cis.spinal.ElemTreeFormatException if an error occurs while parsing the string representation
121         */
122        public Sentence ofString(String representation) throws ElemTreeFormatException {
123            return new Sentence(representation);
124        }
125        
126        /**
127         * Performs the actual parsing of an LTAG-spinal formatted string 
128         * into a <code>Sentence</code>.
129         * @param rep a <code>String</code> containing a specification of a sentence in LTAG-spinal format
130         * @throws edu.upenn.cis.spinal.ElemTreeFormatException if the input is not well-formed
131         */
132        void loadFromStringRepresentation(String rep)
133        throws ElemTreeFormatException {
134            
135            String[] lines=rep.split("\\n", 3);
136            // first line looks like:
137            // 0 1 0
138            // (for LTAG spinal treebank)
139            // or like:
140            // 1
141            // (for parser output)
142            
143            String[] locations = lines[0].split(" ");
144            
145            try {
146                if (locations.length == 1) { // typical of parser output
147                    sentenceNumber=Integer.parseInt(locations[0]);
148                } else if (locations.length == 3) { // typical of LTAG treebank
149                    sectionNumber=Integer.parseInt(locations[0]);
150                    fileNumber=Integer.parseInt(locations[1]);
151                    sentenceNumber=Integer.parseInt(locations[2]);
152                } else throw new ElemTreeFormatException
153                        ("Invalid sentence number" + locations);
154            } catch (NumberFormatException e) {
155                throw new ElemTreeFormatException("Invalid sentence number " +
156                        locations);
157            }
158            
159            // second line looks like:
160            // root 6
161            // or like:
162            // skip
163            
164            if (lines[1].equals("skip")) {
165                skip=true;
166                return;
167            }
168            
169            
170            int rootNumber=-1;
171            
172            try {
173                String[] rootParts=lines[1].split("\\s+");
174                
175                if (rootParts.length>2) {
176                    
177                    String location;
178                    
179                    if (sectionNumber != -1 && fileNumber != -1) {
180                        location = "section " + sectionNumber
181                            + ", file " +  fileNumber
182                            + ", sentence " + sentenceNumber;
183    
184                    } else {
185                        location = "sentence " + sentenceNumber;
186                    }
187                    
188                    System.err.println("WARNING: "
189                            + location
190                            + " has a multirooted parse (\""
191                            + lines[1] +
192                            "\"). This is not supported by the API and not " +
193                            "conform to LTAG-spinal standards, although " +
194                            "Shen's incremental parser sometimes produces this " +
195                            "output. " +
196                            "Only the first root has been read in.");
197                    multirootedParses++;
198                }
199                
200                if (rootParts.length<2) {
201                    System.err.println("WARNING: Bad root: "
202                            + "section " + sectionNumber
203                            + " file " +  fileNumber
204                            + " sentence " + sentenceNumber);
205                }
206                
207                rootNumber=
208                        Integer.parseInt(rootParts[1]);
209            } catch (NumberFormatException e) {
210                throw new ElemTreeFormatException("Invalid root number: " +
211                        lines[1].substring(5));
212            }
213            
214            // now parse the spinal elementary trees in the file
215            String[] elemTreeRepresentations=elemTreePattern.split(lines[2],0);
216            
217            for (int i=1; i<elemTreeRepresentations.length; i++) {
218                elemTrees.add(new ElemTree(this, elemTreeRepresentations[i]));
219            }
220            
221            root=(ElemTree)elemTrees.get(rootNumber);
222            
223            for (Iterator it=elemTrees.iterator(); it.hasNext(); ) {
224                ElemTree node=(ElemTree)it.next();
225                
226                node.complete();
227            }
228        }
229        
230        /**
231         * Reads a string representation of a derivation tree from the specified
232         * <code>BufferedReader</code>.
233         * @return a <code>Sentence</code> element representing the derivation tree, or null
234         * if the input contained nothing or contained only whitespace
235         * @param inp the <code>BufferedReader</code> from which to read
236         * @throws edu.upenn.cis.spinal.ElemTreeFormatException if an error occurs while parsing the string representation
237         * @throws java.io.IOException if an error occurs while reading
238         */
239        public static Sentence readTree(BufferedReader inp)
240        throws ElemTreeFormatException, IOException {
241            StringBuffer s=new StringBuffer("");
242            String in=null;
243            
244            while (true) {
245                in=inp.readLine();
246                
247                if ((in==null) || in.matches("^\\s*$")) { // null or whitespace
248                    break;
249                } else {
250                    s.append(in);
251                    s.append("\n"); 
252                }
253            }
254            
255            if (s.length()==0) {
256                return null;
257            } else {
258                return new Sentence(s.toString());
259            }
260        }
261        
262        /**
263         * Prints this sentence to the specified output in LTAG-spinal format.
264         * @param w the {@link java.io.Writer} to which this sentence is to be written
265         * @throws java.io.IOException if an error occurs during writing
266         */
267        public void writeTo(Writer w) throws IOException {
268            this.writeTo(new BufferedWriter(w));
269        }
270    
271        /**
272         * Prints this sentence to the specified output in LTAG-spinal format.
273         * @param b the {@link java.io.BufferedWriter} to which this sentence is to be written
274         * @throws java.io.IOException if an error occurs during writing
275         */
276        public void writeTo(BufferedWriter b) throws IOException {
277            b.write(this.getLocation());
278            b.newLine();
279            if (this.isSkipped()) {
280                b.write("skip");
281                b.newLine();
282                b.newLine();
283                
284                b.flush();
285                return;
286            }
287            
288            b.write("root " + this.getRoot().getNumber());
289            b.newLine();
290            
291            Iterator theElemTrees = this.elemTreesIterator();
292            while (theElemTrees.hasNext()) {
293                ElemTree current = (ElemTree) theElemTrees.next();
294                b.write(current.toString());
295                
296            }
297            b.newLine();
298            b.flush();
299            
300        }
301    
302        
303        /**
304         * Writes a visual representation of a subpart of this sentence in Graphviz format 
305         * to the specified {@link java.io.BufferedWriter}.
306         * 
307         * 
308         * @param b the {@link java.io.BufferedWriter} to which the subsentence is to be written
309         * @param start the first word of the sentence to be included in the graphical output
310         * @param end the last word of the sentence to be included in the graphical output
311         * @param includeSpans if true, the word span of the subtree dominated by a node
312         * is appended to that node's representation; otherwise, it only
313         * consists of the node label
314         * @param beanPoleStyle chooses between two very different styles of output -- if true,
315         * the output looks like beanpoles, if false, it looks like tadpoles. See the 
316         * illustrations on the <a href="http://www.cis.upenn.edu/%7Extag/spinal">LTAG-spinal website</a>.
317         * @param showSpines if true, shows the internal structure of the elementary trees; 
318         * otherwise, shows each elementary tree as one single node
319         * @throws java.io.IOException if an error occurs during writing
320         */
321        public void writeGraphvizTo(BufferedWriter b, int start, int end, 
322                boolean includeSpans, boolean beanPoleStyle,boolean showSpines) throws IOException {
323            b.write("digraph {");
324            b.newLine();
325            
326            // write out location
327            b.write("node_location[label=\""
328                    + this.prettyPrintLocation()
329                    + "\" shape=box];");
330            b.newLine();
331            
332            
333            if (this.isSkipped()) {
334                b.write("skip;");
335            } else {
336                
337                if (showSpines && !this.isBidirectionalParserOutput()) {
338                    // Create cluster with all the terminal nodes
339                    // in order to line them up horizontally.
340                    // We only do this if we actually have spines to show (i.e.
341                    // not if we have bidirectional parser output) and if we are 
342                    // asked to show the spines (i.e. showSpines == true) because otherwise
343                    // the sentence becomes unreadable.
344                    b.write("subgraph { rank = same; edge [style=invis] ");
345                    Iterator trees;
346                    if (start==-1 || end==-1) {
347                        trees = this.getElemTrees().listIterator();
348                    } else {
349                        trees = this.getElemTrees(start,end).listIterator();
350                    }
351                    String previousID = null, currentID = null;
352                    while (trees.hasNext()) {
353                        ElemTree current = (ElemTree) trees.next();
354                        previousID = currentID;
355                        currentID = current.getGraphvizNodeID();
356                        if (previousID != null) {
357                            b.write(" -> ");
358                        }
359                        b.write(currentID);
360                    }
361                    b.write("; }");
362                    b.newLine();
363                }            
364                
365                // try to find appropriate subtree
366                ElemTree subTree = null;
367                if ((start >=0) && (end >= start)) { // if we aren't supposed to output the whole tree
368                    subTree = this.getSubTree(start,end); 
369                    // may return null if can't find anything
370                }        
371                
372                if (subTree == null) { // we didn't find anything, or we're working on the whole tree
373                    subTree = this.getRoot();
374                }
375                
376                
377                subTree.writeGraphvizTo(b, start, end, includeSpans, beanPoleStyle, showSpines);
378            }
379            
380            b.newLine();
381            b.write("}");
382            b.flush();
383            return;
384            
385        }
386        
387        /**
388         * Writes a visual representation of this sentence in Graphviz format 
389         * to the specified {@link java.io.Writer}.
390         * 
391         * @param w the {@link java.io.Writer} to which this sentence is to be written
392         * @param includeSpans if true, the word span of the subtree dominated by a node
393         * is appended to that node's representation; otherwise, it only
394         * consists of the node label
395         * @param beanPoleStyle chooses between two very different styles of output -- if true,
396         * the output looks like beanpoles, if false, it looks like tadpoles. See the 
397         * illustrations on the <a href="http://www.cis.upenn.edu/%7Extag/spinal">LTAG-spinal website</a>.
398         * @param showSpines if true, shows the internal structure of the elementary trees; 
399         * otherwise, shows each elementary tree as one single node
400         * @throws java.io.IOException if an error occurs while writing
401         */
402        public void writeGraphvizTo(Writer w, boolean includeSpans, boolean beanPoleStyle,boolean showSpines) throws IOException {
403            this.writeGraphvizTo(new BufferedWriter(w), -1, -1, includeSpans, beanPoleStyle, showSpines);
404        }
405        
406        /**
407         * Writes a visual representation of a subpart of this sentence in Graphviz format 
408         * to the specified {@link java.io.Writer}.
409         * 
410         * 
411         * @param w the {@link java.io.Writer} to which the subsentence is to be written
412         * @param start the first word of the sentence to be included in the graphical output
413         * @param end the last word of the sentence to be included in the graphical output
414         * @param includeSpans if true, the word span of the subtree dominated by a node
415         * is appended to that node's representation; otherwise, it only
416         * consists of the node label
417         * @param beanPoleStyle chooses between two very different styles of output -- if true,
418         * the output looks like beanpoles, if false, it looks like tadpoles. See the 
419         * illustrations on the <a href="http://www.cis.upenn.edu/%7Extag/spinal">LTAG-spinal website</a>.
420         * @param showSpines if true, shows the internal structure of the elementary trees; 
421         * otherwise, shows each elementary tree as one single node
422         * @throws java.io.IOException if an error occurs while writing
423         */
424        public void writeGraphvizTo(Writer w, int start, int end, boolean includeSpans, boolean beanPoleStyle,boolean showSpines) throws IOException {
425            this.writeGraphvizTo(new BufferedWriter(w), start, end, includeSpans, beanPoleStyle, showSpines);
426        }
427    
428        /**
429         * Writes a visual representation of this sentence in Graphviz format 
430         * to the specified {@link java.io.BufferedWriter}.
431         * 
432         * 
433         * @param b the {@link java.io.BufferedWriter} to which this sentence is to be written
434         * @param includeSpans if true, the word span of the subtree dominated by a node
435         * is appended to that node's representation; otherwise, it only
436         * consists of the node label
437         * @param beanPoleStyle chooses between two very different styles of output -- if true,
438         * the output looks like beanpoles, if false, it looks like tadpoles. See the 
439         * illustrations on the <a href="http://www.cis.upenn.edu/%7Extag/spinal">LTAG-spinal website</a>.
440         * @param showSpines if true, shows the internal structure of the elementary trees; 
441         * otherwise, shows each elementary tree as one single node
442         * @throws java.io.IOException if an error occurs while writing
443         */
444        public void writeGraphvizTo(BufferedWriter b, boolean includeSpans, boolean beanPoleStyle,boolean showSpines) throws IOException {
445            this.writeGraphvizTo(b, -1, -1, includeSpans, beanPoleStyle, showSpines);
446        }
447        
448        /**
449         * Returns a string representation of this sentence in LTAG-spinal format.
450         *
451         * @return a string representing this sentence
452         */
453        public String toString() {
454            
455            StringWriter sw = new StringWriter(100);
456            try {
457                this.writeTo(new BufferedWriter(sw));
458            } catch (IOException ex) {
459                // can't occur with a StringWriter
460                ex.printStackTrace();
461            }
462            return sw.toString();
463        }
464        
465        
466        /**
467         * Returns a visual representation of this sentence in Graphviz format.
468         * @param includeSpans if true, the word span of the subtree dominated by a node
469         * is appended to that node's representation; otherwise, it only
470         * consists of the node label
471         * @param beanPoleStyle chooses between two very different styles of output -- if true,
472         * the output looks like beanpoles, if false, it looks like tadpoles. See the 
473         * illustrations on the <a href="http://www.cis.upenn.edu/%7Extag/spinal">LTAG-spinal website</a>.
474         * @param showSpines if true, shows the internal structure of the elementary trees; 
475         * otherwise, shows each elementary tree as one single node
476         * @return a <code>String</code> containing Graphviz format
477         */
478        public String toGraphviz(boolean includeSpans, boolean beanPoleStyle,boolean showSpines) {
479            return this.toGraphviz(-1,-1, includeSpans, beanPoleStyle, showSpines);
480        }
481        
482        
483        
484        /**
485         * Returns a visual representation of a subpart of this sentence in Graphviz format.
486         * 
487         * 
488         * @param start the first word of the sentence to be included in the graphical output
489         * @param end the last word of the sentence to be included in the graphical output
490         * @param includeSpans if true, the word span of the subtree dominated by a node
491         * is appended to that node's representation; otherwise, it only
492         * consists of the node label
493         * @param beanPoleStyle chooses between two very different styles of output -- if true,
494         * the output looks like beanpoles, if false, it looks like tadpoles. See the 
495         * illustrations on the <a href="http://www.cis.upenn.edu/%7Extag/spinal">LTAG-spinal website</a>.
496         * @param showSpines if true, shows the internal structure of the elementary trees; 
497         * otherwise, shows each elementary tree as one single node
498         *
499         * @return a <code>String</code> containing Graphviz format
500         */
501        public String toGraphviz(int start, int end, boolean includeSpans, boolean beanPoleStyle,boolean showSpines) {
502            StringWriter sw = new StringWriter(100);
503            try {
504                this.writeGraphvizTo(new BufferedWriter(sw), start, end, includeSpans, beanPoleStyle, showSpines);
505            } catch (IOException ex) {
506                // can't occur with a StringWriter
507                ex.printStackTrace();
508            }
509            return sw.toString();
510            
511        }
512        
513        /**
514         * Returns a <code>String</code> representing the location of the current sentence
515         * -- i.e. either a triple of section, file, and sentence number as
516         * in the LTAG-spinal treebank (following the Penn Treebank conventions),
517         * or simply a sentence number if the sentence is not from the 
518         * LTAG-spinal treebank.
519         * @return three numbers indicating where this sentence is found in
520         * the input
521         */
522        
523        public String getLocation() {
524            String result = "";
525            if (sectionNumber != -1 && fileNumber != -1) {
526                result += sectionNumber + " " + fileNumber + " ";
527            }
528            return result + sentenceNumber;
529        }
530        
531        /**
532         * Returns a human-readable string representing the location of the current sentence.
533         * If the sentence is taken from the LTAG-spinal treebank, the string looks 
534         * as follows:
535         * <pre>
536         * Section: X File: Y Sentence: Z
537         * </pre>
538         * Otherwise,
539         * if the sentence only has a sentence number, the string looks like
540         * <pre>
541         * Sentence: Z
542         * </pre>
543         * @return a human-readable string indicating where this sentence is found in
544         * the input
545         */
546        public String prettyPrintLocation() {
547            String result = "";
548            if (sectionNumber != -1 && fileNumber != -1) {
549                result += "Section: " + sectionNumber
550                        + "  File: " + fileNumber + "  ";
551            }
552            return result + "Sentence: " + sentenceNumber;
553        }
554        
555        /**
556         * Returns the unique <code>ElemTree</code> that is the root of a subtree whose
557         * yield is the specified word span, or null if there is no such
558         * tree.
559         * 
560         * @param w the span from the first up to and including the last word
561         * @throws edu.upenn.cis.spinal.SkippedSentenceException if the current sentence 
562         * is a skipped sentence in the LTAG-spinal treebank
563         * @return an <code>ElemTree</code> or null
564         */
565        public ElemTree getSubTree(WordSpan w) {
566            return this.getSubTree(w.start(), w.end());
567        }
568        
569        
570        
571        /**
572         * Returns the unique <code>ElemTree</code> that is the root of a subtree whose
573         * yield is the specified word span, or null if there is no such
574         * tree.
575         * 
576         * @param start the first (leftmost) word included in the span
577         * @param end the last (rightmost) word included in the span
578         * @throws edu.upenn.cis.spinal.SkippedSentenceException if the current sentence 
579         * is a skipped sentence in the LTAG-spinal treebank
580         * @return an <code>ElemTree</code> or null
581         */
582        public ElemTree getSubTree(int start, int end) {
583            
584            if (this.isSkipped()) {
585                throw new SkippedSentenceException(this);
586            }
587            
588            if (this.length()-1 < end) {
589                throw new IllegalArgumentException("Attempted to retrieve a subtree from" +
590                        "a WordSpan that spans outside of the sentence");
591            }
592            
593            // TODO add warnings if start > end, start < 0, end < 0, etc. here and elsewhere
594            
595            List candidates = this.getElemTrees(start, end);
596            Iterator iter = candidates.iterator();
597            ElemTree current;
598            WordSpan span;
599            while (iter.hasNext()) {
600                current = (ElemTree) iter.next();
601                span = current.getSpan();
602                if (start == span.start() && end == span.end()) {
603                    return current;
604                }
605            }
606            // if we haven't found anything...
607            return null;
608        }
609        
610        /**
611         * Returns true if this <code>Sentence</code> has been read in from the 
612         * format used in the output of Shen's bidirectional parser. If this is the case,
613         * no information about the spine is present. This is implemented as a simple
614         * lookup of the corresponding property of the <code>ElemTree</code> at
615         * the root of this sentence.
616         * @return a boolean value
617         * @see ElemTree#isBidirectionalParserOutput()
618         */
619        public boolean isBidirectionalParserOutput() {
620            return this.getRoot().isBidirectionalParserOutput();
621        }
622    
623        
624        /**
625         * Returns true iff the annotation for this sentence only consists of the word "skip",
626         * indicating that it is contained in the Penn Treebank but
627         * not in the LTAG-spinal treebank.
628         * @return true iff this sentence is skipped in the LTAG-spinal treebank
629         */
630        public boolean isSkipped() {
631            return this.skip;
632        }
633        
634        /**
635         * Returns the number of the current sentence in the Penn Treebank file or parser output.
636         * @return the sentence number
637         */
638        public int getSentenceNumber() {
639            return sentenceNumber;
640        }
641        
642        /**
643         * Returns the number of the Penn Treebank section in which the current sentence
644         * occurred, or -1 if the sentence is not a Penn Treebank sentence.
645         * @return the section number, or -1 if there is no such number
646         */
647        public int getSectionNumber() {
648            return sectionNumber;
649        }
650        
651        /**
652         * Returns the number of the Penn Treebank file in which the current sentence
653         * occurred, or -1 if the sentence is not a Penn Treebank sentence.
654         * @return the file number, or -1 if there is no such number
655         */
656        public int getFileNumber() {
657            return fileNumber;
658        }
659        
660        
661        /**
662         * Force the spans to be computed recursively on every <code>ElemTree</code>.
663         * @throws edu.upenn.cis.spinal.SkippedSentenceException if the current sentence 
664         * is a skipped sentence in the LTAG-spinal treebank
665         */
666        private void computeSpans() {
667            if (this.isSkipped()) {
668                throw new SkippedSentenceException(this);
669            }
670    
671            this.getRoot().computeSpan();
672            spansComputed = true;
673        }
674        
675        /**
676         * Returns the <code>ElemTree</code> whose yield is the given word span, 
677         * or null if there isn't one.
678         * @param w the <code>WordSpan</code> for which the dominating tree 
679         * is to be returned
680         * @return an <code>ElemTree</code> or null
681         * @throws edu.upenn.cis.spinal.SkippedSentenceException if the current sentence 
682         * is a skipped sentence in the LTAG-spinal treebank
683         */
684        public ElemTree subTreeForSpan(WordSpan w) {
685            if (this.isSkipped()) {
686                throw new SkippedSentenceException(this);
687            }
688    
689            if (!spansComputed) computeSpans();
690            if (!spanTableComputed) computeSpanTable();
691            if (spanTable.containsKey(w)) {
692                return (ElemTree) spanTable.get(w);
693            } else {
694                return null;
695            }
696        }
697        
698        /**
699         * Computes the span table, a directory whose keys are word spans and whose
700         * values are the corresponding subtrees (if any).
701         * @throws edu.upenn.cis.spinal.SkippedSentenceException if the current sentence 
702         * is a skipped sentence in the LTAG-spinal treebank
703         */
704        public void computeSpanTable() {
705            if (this.isSkipped()) {
706                throw new SkippedSentenceException(this);
707            }
708    
709            if (!spansComputed) computeSpans();
710            Iterator iter = this.elemTreesIterator();
711            spanTable = new HashMap(this.length());
712            ElemTree current;
713            String bugs="/Users/lingrad2/srl/ltagtb/bugs/";
714            while (iter.hasNext()) {
715    
716                current = (ElemTree) iter.next();
717                
718                
719                
720    // The following code was used to retrieve some buggy sentences.
721    // "There are at most 28 sentences in the corpus like that. So we can
722    //ignore them for now, or fix them by hand when we have the chance
723    //(which would lead us to the question of whether we should put the
724    //treebank under version control). I'm attaching a zipfile with the
725    //sentences in question. These are .dot files that you can open in
726    //graphviz or convert using "dot -Tjpg -o output.jpg filename.dot".
727    //Actually not all of them have the bug. The common property of the 28
728    //sentences is that in each of them there are at least two subtrees of
729    //the derivation tree with identical yield. See the .info files." 
730    // (Mail by Lucas Champollion to Joshi and Libin Nov 29 2006)
731    
732    
733                
734                
735    //            String lws = this.getLocation().replaceAll(" ", "_");
736    //            // location with underscores
737    //            if (spanTable.containsKey(current.getSpan())) {
738    //                try {
739    //                    
740    //                    BufferedWriter f = new BufferedWriter
741    //                            (new FileWriter(bugs+lws+".dot"));
742    //                    this.writeGraphvizTo(f);
743    //                    f.close();
744    //                } catch (IOException ex) {
745    //                    ex.printStackTrace();
746    //                }
747    //                BufferedWriter f;
748    //                try {
749    //                    f = new BufferedWriter(new FileWriter(bugs+lws + ".info"));
750    //                    f.write("location: " + this.getLocation());
751    //                    f.newLine();
752    //                    f.write("span: " + current.getSpan());
753    //                    f.newLine();
754    //                    f.write("current: " + current);
755    //                    f.newLine();
756    //                    f.write("stored: " + (ElemTree) spanTable.get(current.getSpan()));
757    //                    f.newLine();
758    //                    f.close();
759    //                } catch (IOException ex) {
760    //                    ex.printStackTrace();
761    //                }
762    //                
763    //            }
764                spanTable.put(current.getSpan(), current);
765            }
766            spanTableComputed=true;
767        }
768        
769        /**
770         * Returns true iff at least one of the elementary trees in this 
771         * <code>Sentence</code> is an initial tree.
772         * @throws edu.upenn.cis.spinal.SkippedSentenceException if the current sentence 
773         * is a skipped sentence in the LTAG-spinal treebank
774         * @return true iff there is at least one attachment operation in the 
775         * present tree
776         */
777        public boolean containsAttachment() {
778            if (this.isSkipped()) {
779                throw new SkippedSentenceException(this);
780            }
781            Iterator iter = this.elemTreesIterator();
782            
783            boolean result = false;
784            
785            while (iter.hasNext()) {
786                ElemTree current = (ElemTree) iter.next();
787                if (current.isInitial()) result = true;
788            }
789            return result;
790        }
791        
792        /**
793         * Returns true iff at least one of the elementary trees in this 
794         * <code>Sentence</code> is an auxiliary tree.
795         * @throws edu.upenn.cis.spinal.SkippedSentenceException if the current sentence 
796         * is a skipped sentence in the LTAG-spinal treebank
797         * @return true iff there is at least one adjunction operation in the 
798         * present tree
799         */
800        public boolean containsAdjunction() {
801            if (this.isSkipped()) {
802                throw new SkippedSentenceException(this);
803            }
804            Iterator iter = this.elemTreesIterator();
805            
806            boolean result = false;
807            
808            while (iter.hasNext()) {
809                ElemTree current = (ElemTree) iter.next();
810                if (current.isAuxiliary()) result = true;
811            }
812            return result;
813        }
814    
815        /**
816         * Returns true iff at least one of the elementary trees in this 
817         * <code>Sentence</code> is a conjunction tree.
818         * @throws edu.upenn.cis.spinal.SkippedSentenceException if the current sentence 
819         * is a skipped sentence in the LTAG-spinal treebank
820         * @return true iff there is at least one coordination operation in the 
821         * present tree
822         */
823        public boolean containsCoordination() {
824            if (this.isSkipped()) {
825                throw new SkippedSentenceException(this);
826            }
827            Iterator iter = this.elemTreesIterator();
828            
829            boolean result = false;
830            
831            while (iter.hasNext()) {
832                ElemTree current = (ElemTree) iter.next();
833                if (current.isCoord()) result = true;
834            }
835            return result;
836        }
837        
838    //    public void computeExtendedSpanTable() {
839    //        computeSpanTable();
840    //        
841    //        // extended span table is in the same style but trees occur
842    //        // under multiple entries: a tree may be pruned by removing any
843    //        // number of children
844    //        throw new RuntimeException("Not implemented yet");
845    //    }
846        
847        
848        /**
849         * Returns the length of this <code>Sentence</code>, that is, the number of elementary
850         * trees in this derivation tree.
851         * @throws edu.upenn.cis.spinal.SkippedSentenceException if the current sentence 
852         * is a skipped sentence in the LTAG-spinal treebank
853         * @return the number of elementary trees in this <code>Sentence</code>
854         */
855        public int length() {
856            if (this.isSkipped()) {
857                throw new SkippedSentenceException(this);
858            }
859    
860            return elemTrees.size();
861        }
862        
863        /**
864         * Returns the elementary tree at the root of this <code>Sentence</code>.
865         * 
866         * @throws edu.upenn.cis.spinal.SkippedSentenceException if the current sentence 
867         * is a skipped sentence in the LTAG-spinal treebank
868         * @return the <code>ElemTree</code> in which this derivation tree is rooted
869         */
870        public ElemTree getRoot() {
871            if (this.isSkipped()) {
872                throw new SkippedSentenceException(this);
873            }
874    
875            return root;
876        }
877        
878        /**
879         * Iterates over the elementary trees of which this <code>Sentence</code> 
880         * consists, in the order in which they are numbered (left to right in the 
881         * sentence). 
882         * @throws edu.upenn.cis.spinal.SkippedSentenceException if the current sentence 
883         * is a skipped sentence in the LTAG-spinal treebank
884         * @return a <code>ListIterator</code> 
885         */
886        public ListIterator elemTreesIterator() {
887            if (this.isSkipped()) {
888                throw new SkippedSentenceException(this);
889            }
890    
891            return elemTrees.listIterator();
892        }
893        
894        /**
895         * Returns the <code>ElemTree</code> associated with the <code>n</code>th word of
896         * the sentence.
897         * @param n a number between 0 and the length of the sentence
898         * @throws    IndexOutOfBoundsException if index is out of range <tt>(index
899         *            &lt; 0 || index &gt;= size())</tt>.
900         * @return an <code>ElemTree</code> for the <code>n</code>th word of the sentence
901         */
902        public ElemTree getElemTree(int n) {
903            if (this.isSkipped()) {
904                throw new SkippedSentenceException(this);
905            }
906    
907            return (ElemTree)elemTrees.get(n);
908        }
909        
910        /**
911         * Returns a <code>List</code> of <code>ElemTree</code>s for 
912         * the given word span.
913         * @return an ordered list containing some of the elementary trees of which 
914         * this sentence consists
915         * @throws edu.upenn.cis.spinal.SkippedSentenceException if the current sentence 
916         * is a skipped sentence in the LTAG-spinal treebank
917         */
918        public List getElemTrees() {
919            if (this.isSkipped()) {
920                throw new SkippedSentenceException(this);
921            }
922    
923            return this.elemTrees;
924        }
925        
926        /**
927         * Returns a <code>List</code> of <code>ElemTree</code>s for 
928         * the given word span.
929         * @param from the first word to be included in the list
930         * @param to the last word to be included in the list
931         * @return an ordered list containing some of the elementary trees of which 
932         * this sentence consists
933         * @throws edu.upenn.cis.spinal.SkippedSentenceException if the current sentence 
934         * is a skipped sentence in the LTAG-spinal treebank
935         */
936        public List getElemTrees(int from, int to) {
937            if (this.isSkipped()) {
938                throw new SkippedSentenceException(this);
939            }
940    
941            return this.elemTrees.subList(from, to+1);
942            // the "+1" is because our spans are inclusive but subList is exclusive
943        }
944        
945    
946    
947        
948    
949    
950        
951    }