Tell me how to make an HTML parser of tables with output to a TXT file in JAVA.
Here is what I could do myself:
public class HTMLParser { public static void main(String[] args) throws IOException { File input = new File("d:/1/example.html"); org.jsoup.nodes.Document doc = Jsoup.parse(input, "UTF-8"); String file = doc.html(); String trLine = file.substring(file.indexOf("<tr>"),file.indexOf("</tr>")); int countTd = 2; int countTr = 0; String[] str = trLine.split(">*<"); for (String string : str){ if(string.contains("/td>")){ countTd++; } if(string.contains("colspan")){ countTd += Integer.parseInt(string.substring(string.indexOf("\"") +1, string.lastIndexOf("\""))) - 1; } } String[] str2 = file.split("<tr>"); for(String string : str2){ if(string.contains("</tr>")){ countTr++; } } countTr = countTr * 2 +1; String[][] table = new String[countTr][countTd]; for (int i = 0; i < countTr; i++){ table[i][0] = "*"; table[i][countTd-1] = "*"; } String[] lines = file.split("\n"); int y = 1; int x = 1; for(int i = 0; i < lines.length; i++){ if(lines[i].contains("<td>")){ for(int a = 0; a <countTd; a++ ){ if(table[y][x + a] == null ){ table[y][x] = lines[i].substring((lines[i].indexOf('>') + 1), lines[i].lastIndexOf("</td>") ); x++; a = countTd; } } } else if(lines[i].contains("colspan")){ int colspan = Integer.parseInt(lines[i].substring((lines[i].indexOf('\"') + 1),lines[i].lastIndexOf('\"'))); table[y][x] = lines[i].substring((lines[i].indexOf('>') + 1 ), lines[i].lastIndexOf('<') ); x+=colspan; } else if(lines[i].contains("</tr>")){ x = 1; y++; } else if(lines[i].contains("rowspan")){ int rowspan = Integer.parseInt(lines[i].substring((lines[i].indexOf('\"') + 1),lines[i].lastIndexOf('\"'))); table[y][x] = lines[i].substring((lines[i].indexOf('>') + 1 ), lines[i].lastIndexOf('<') ); for(int j = 1; j != rowspan; j++){ table[y+j][x] = " "; } x++; } } for(int i = 0; i<countTr; i++){ String s = ""; for(int j =0; j< countTd; j++){ s+=table[i][j]; } ConsoleHelper.println(s); } } }