package webTools;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import dbTools.DBTools;
public class IOTOWeb {
public String getHtmlContent(String htmlURL) {
URL url = null;
String rowContent = "";
StringBuffer htmlContent = new StringBuffer();
try {
url = new URL(htmlURL);
BufferedReader in = new BufferedReader(new InputStreamReader(url
.openStream(), "gb2312"));
while ((rowContent = in.readLine()) != null) {
htmlContent.append(rowContent);
}
in.close();
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return htmlContent.toString();
}
public List getLink(String htmlContent) {
ArrayList listLink = new ArrayList();
String regex = "
]*>[//(]*]*href=(/"([^/"]*)/"|/'([^/']*)/'|([^//s>]*))[^>]*>(.*?)[//)]*[//s]* | ";
Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);
Matcher matcher = pattern.matcher(htmlContent);
while (matcher.find()) {
listLink.add(matcher.group());
}
return listLink;
}
public List
getHref(String htmlContent) {
String regex;
List listtHref = new ArrayList();
regex = "href=(/"([^/"]*)/"|/'([^/']*)/'|([^//s>]*))/"";
Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
Matcher ma = pa.matcher(htmlContent);
while (ma.find()) {
listtHref.add(ma.group().replaceFirst("href=/"", "").replace("/"",
""));
}
return listtHref;
}
public List getPerson(String htmlContent) {
String regex;
List list = new ArrayList();
regex = "//(]*href=(/"([^/"]*)/"|/'([^/']*)/'|([^//s>]*))[^>]*>(.*?)//)";
Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
Matcher ma = pa.matcher(htmlContent);
while (ma.find()) {
list.add(ma.group().replaceFirst("href=/"", "").replace("/"", ""));
}
return list;
}
public List getSongName(String htmlContent) {
String regex;
List listPerson = new ArrayList();
regex = "]*href=(/"([^/"]*)/"|/'([^/']*)/'|([^//s>]*))[^>]*>(.*?)//s";
Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
Matcher ma = pa.matcher(htmlContent);
while (ma.find()) {
listPerson.add(ma.group());
}
return listPerson;
}
public String getMainContent(String htmlContent) {
String regex = "";
StringBuffer mainContent = new StringBuffer();
Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);
Matcher matcher = pattern.matcher(htmlContent);
while (matcher.find()) {
mainContent.append(matcher.group());
}
return mainContent.toString();
}
public String outTag(final String s) {
return s.replaceAll("<.>", "");
}
DBTools dbTools = new DBTools();
public void getFromBaiduMap3(String htmlURL) throws Throwable {
HashMap htmlContentMap = new HashMap();
String htmlContent = getHtmlContent(htmlURL);
String mainContent = getMainContent(htmlContent);
List listLink = getLink(mainContent);
for (int j = 0; j String tdTag = listLink.get(j).toString();
List songNameList = getSongName(tdTag);
String songName = outTag(songNameList.get(0).toString());
List personList = getPerson(tdTag);
String songPerson = "";
if (personList.size() != 0) {
for (int n = 0; n // System.out.println(personList.get(n).toString());
songPerson = outTag(personList.get(n).toString());
}
} else {
songPerson = "无";
}
// System.out.print(songNameList.get(0).toString());
List hrefList = getHref(songNameList.get(0).toString());
String songHref = hrefList.get(0).toString();
System.out.println();
String sql = "insert into song(songName,songPerson,songHref) values(?,?,?)";
ArrayList list_values = new ArrayList();
list_values.add(songName);
list_values.add(songPerson);
list_values.add(songHref);
dbTools.update(sql, list_values);
}
}
}