Здравствуйте.
Решил пойти сложным путем - написал собственный парсер без дополнительных библиотек. Даже разработал класс Tag под эту задачу.
Все работает, условие выполняет в точности.
Пробовал парсить случайную страницу с JavaRush - опять же, работает.
Что опять не так?
package com.javarush.task.task19.task1918;
/*
Знакомство с тегами
*/
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
public class Solution {
public static void main(String[] args) throws IOException {
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
String fileName = br.readLine();
br.close();
if (args.length > 0) {
String query = args[0];
br = new BufferedReader(new FileReader(fileName));
ArrayList<String> data = new ArrayList<>(); //file
while (br.ready()) { data.add(br.readLine()); }//read file
br.close();//close reader
List<Tag> allTags = Tag.parseTags(data, query);//parse tags
LinkedList<String> tagStrings = new LinkedList<>();
for(int i=0;i<allTags.size();i++)//searching for pairs, filling tagStrings
{
Tag startTag = allTags.get(i);
Tag endTag = null;
if(startTag.getType() == Tag.Type.OPEN) {//a next open tag is found, searching for pair
for(int j = i+1,nested = 0;j<allTags.size();j++)//searching for pair in allTags, starting from next
{
Tag nextTag = allTags.get(j);
if(nextTag.getType() == Tag.Type.OPEN) { nested++; continue; }//found another open tag, go further
if(nested == 0) { endTag = nextTag; break; }//found a pair, exit cycle (j)
nested--;//nested close tag, continue
}
if(endTag == null) continue;//no close tag, ignore
tagStrings.add(getTagString(data,startTag,endTag));
}
}
/*for(Tag tag : allTags)
{
System.out.println(tag.getTagName()
+ " " + tag.getType()
+ " " + tag.getStartPos()[0]
+ ":" + tag.getStartPos()[1]
+ " " + tag.getEndPos()[0]
+ ":" + tag.getEndPos()[1]);
}*/
for(String s:tagStrings)
{
System.out.println(s);
}
}
}
public static String getTagString(List<String> data,Tag openTag, Tag closeTag)
{
StringBuilder result = new StringBuilder();
int start[] = openTag.getStartPos();
int end[] = closeTag.getEndPos();
for(int i = start[0];i<=end[0];i++)
{
if(i==start[0]) {//first line of tag, begin with offset
if(i==end[0]) { result.append(data.get(i).substring(start[1],end[1]+1)); }//tag is a single line
else { result.append(data.get(i).substring(start[1])); }//end is on another line, append string from offset to it's end
}
else { //not a first line, begin with line start
if(i==end[0]) { result.append(data.get(i).substring(0, end[1]+1)); }//end of tag
else { result.append(data.get(i)); } //whole line is a part of tag
}
}
return result.toString();
}
public static class Tag {
public enum Type { OPEN, CLOSE };
private int[] startPos, endPos;
private Type type;
private String tagName;
private Tag() {}
private Tag(int[] startPos, int[] endPos, Type type, String tagName) {
this.startPos = startPos;
this.endPos = endPos;
this.type = type;
this.tagName = tagName;
}
public Type getType() { return this.type; }
public String getTagName() { return this.tagName; }
public int[] getStartPos() { return this.startPos; }
public int[] getEndPos() { return this.endPos; }
public static List<Tag> parseTags(String line, int lineNumber, String tagName) {
ArrayList<Tag> result = new ArrayList<>();
if(!line.contains(tagName)||!line.contains("<")||!line.contains(">")) return result;
int startPos, endPos, offset = 0; //current position in line
String next; Type type;
while(line.contains("<")&&line.contains(">")) {
startPos = line.indexOf("<");
endPos = line.indexOf(">");
next = line.substring(startPos+1,endPos);
line = line.substring(endPos+1);
if(isRightTag(next,tagName)) {
type = parseType(next);//try to find a slash symbol at the beginning of tag
result.add(new Tag(new int[]{lineNumber,startPos+offset},new int[]{lineNumber,endPos+offset},type,tagName));
}
offset += endPos + 1;//add number of deleted symbols from line
}
return result;
}
private static boolean isRightTag(String content, String tagName) {
if(!content.contains(tagName)) return false;
StringBuilder sb = new StringBuilder(content);
while(Character.isSpaceChar(sb.charAt(0))||sb.charAt(0) == '/') sb.deleteCharAt(0);
if(sb.toString().equals(tagName)) return true;
if(sb.toString().startsWith(tagName)) {
if(Character.isSpaceChar(sb.charAt(tagName.length()))) return true;
}
return false;
}
public static List<Tag> parseTags(List<String> data, String tagName)
{
ArrayList<Tag> result = new ArrayList<>();
int startLine, endLine, startPos, endPos, offset = 0; //current position
String currentLine, nextLine, subLine;
for(int i = 0;i < data.size(); i++, offset = 0)
{
currentLine = data.get(i);
while(currentLine.contains("<")) { // line has tag start
startPos = currentLine.indexOf("<");
if(currentLine.contains(">")) { // line contains < and >
endPos = currentLine.indexOf(">");
if (startPos < endPos) { // < is earlier than >, so we have a tag
subLine = currentLine.substring(startPos + 1,endPos);
if(isRightTag(subLine,tagName))//right tag, add this to result
{
Type type = parseType(subLine);
int tagStart[] = {i,startPos + offset};
int tagEnd[] = {i,endPos + offset};
Tag tag = new Tag(tagStart,tagEnd,type,tagName);
result.add(tag);
}//if (not) the right tag, remove this and continue
}
offset += endPos + 1;
currentLine = currentLine.substring(endPos + 1);//remove current tag from line even if it's just > symbol
}
else {//line has <, but doesn't have >
currentLine = currentLine.substring(startPos + 1);//remove line start with < symbol
offset += startPos;
int tagStart[] = new int[]{i, startPos + offset};
int tagEnd[] = null;
Type type = null;
if(isRightTag(currentLine,tagName)) { type = parseType(currentLine); }
else { for(int j = i+1;j<data.size();j++) {//searching for tagname
nextLine = data.get(j);
if(isRightTag(nextLine,tagName)) { type = parseType(nextLine); }
}
}
if(type!=null) {
for (int j = i + 1; j < data.size(); j++) {//searching for end of tag
nextLine = data.get(j);
if (nextLine.contains(">")) {
tagEnd = new int[]{j, nextLine.indexOf(">")};
break;
}
}
if (tagEnd != null) { result.add(new Tag(tagStart, tagEnd, type, tagName)); }
}
}
}
}
return result;
}
/*public static List<Tag> parseTags(List<String> data, String tagName) {
ArrayList<Tag> result = new ArrayList<>();
for(int i=0;i<data.size();i++) {
List<Tag> temp = parseTags(data.get(i),i,tagName);
if(temp!=null) result.addAll(temp);
}
return result;
}*/
private static Type parseType(String s)
{
StringBuilder sb = new StringBuilder(s);
char slash = '/';
for(int i=0;i<sb.length();i++)
{
if(Character.isSpaceChar(sb.charAt(0))) { sb.deleteCharAt(0); i--;}
else if(sb.charAt(0) == slash) return Type.CLOSE;
else return Type.OPEN;
}
return Type.OPEN;
}
}
}
