// we are processing a comment. We are inside // the <!-- .... --> looking for the -->. } else if(mode == COMMENT) { if(c == '>' && sb.toString().endsWith("--")) { sb.setLength(0); mode = popMode(st); } else sb.append((char)c);
// We are outside the root tag element } else if(mode == PRE) { if(c == '<') { mode = TEXT; st.push(new Integer(mode)); mode = START_TAG; }
// We are inside one of these <? ... ?> // or one of these <!DOCTYPE ... > } else if(mode == DOCTYPE) { if(c == '>') { mode = popMode(st); if(mode == TEXT) mode = PRE; }
// we have just seen a < and // are wondering what we are looking at // <foo>, </foo>, <!-- ... --->, etc. } else if(mode == START_TAG) { mode = popMode(st); if(c == '/') { st.push(new Integer(mode)); mode = CLOSE_TAG; } else if (c == '?') { mode = DOCTYPE; } else { st.push(new Integer(mode)); mode = OPEN_TAG; tagName = null; attrs = new Hashtable(); sb.append((char)c); }
// we are processing an entity, e.g. <, », etc. } else if(mode == ENTITY) { if(c == ';') { mode = popMode(st); String cent = etag.toString(); etag.setLength(0); if(cent.equals("lt")) sb.append('<'); else if(cent.equals("gt")) sb.append('>'); else if(cent.equals("amp")) sb.append('&'); else if(cent.equals("quot")) sb.append('"'); else if(cent.equals("apos")) sb.append('/''); // Could parse hex entities if we wanted to //else if(cent.startsWith("#x")) //sb.append((char)Integer.parseInt(cent.substring(2),16)); else if(cent.startsWith("#")) sb.append((char)Integer.parseInt(cent.substring(1))); // Insert custom entity definitions here else exc("Unknown entity: &"+cent+";",line,col); } else { etag.append((char)c); }
// we have just seen something like this: // <foo a="b"/ // and are looking for the final >. } else if(mode == SINGLE_TAG) { if(tagName == null) tagName = sb.toString(); if(c != '>') exc("Expected > for tag: <"+tagName+"/>",line,col); doc.startElement(tagName,attrs); doc.endElement(tagName); if(depth==0) { doc.enddocument.); return; } sb.setLength(0); attrs = new Hashtable(); tagName = null; mode = popMode(st);