Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug and Vulnerability fixes #834

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ public Site setUserAgent(String userAgent) {
/**
* get cookies
*
* @return get cookies
* @return get defaultCookies
*/
public Map<String, String> getCookies() {
public Map<String, String> getDefaultCookies() {
return defaultCookies;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,15 @@ private SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyM
// 实现一个X509TrustManager接口,用于绕过验证,不用修改里面的方法
X509TrustManager trustManager = new X509TrustManager() {

@Override
public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {

public void checkClientTrusted(X509Certificate[] chain, String authType) throws IllegalArgumentException {
}

@Override
public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {

public void checkServerTrusted(X509Certificate[] chain, String authType) throws IllegalArgumentException {
}

@Override

public X509Certificate[] getAcceptedIssuers() {
return null;
}
Expand Down Expand Up @@ -135,7 +135,7 @@ private void generateCookie(HttpClientBuilder httpClientBuilder, Site site) {
return;
}
CookieStore cookieStore = new BasicCookieStore();
for (Map.Entry<String, String> cookieEntry : site.getCookies().entrySet()) {
for (Map.Entry<String, String> cookieEntry : site.getDefaultCookies().entrySet()) {
BasicClientCookie cookie = new BasicClientCookie(cookieEntry.getKey(), cookieEntry.getValue());
cookie.setDomain(site.getDomain());
cookieStore.addCookie(cookie);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
<!DOCTYPE html>
<html>
<body>
<title>"Package"</title>
Downloader is the part that downloads web pages and store in Page object.
</body>
</html>
11 changes: 6 additions & 5 deletions webmagic-core/src/main/java/us/codecraft/webmagic/package.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
<html>
<body>
<div class="en">
Main class "Spider" and models.
</div>
</body>
<body>
<title>"Package"</title>
<div class="en">
Main class "Spider" and models.
</div>
</body>
</html>
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public FilePipeline(String path) {

@Override
public void process(ResultItems resultItems, Task task) {
String path = this.path + PATH_SEPERATOR + task.getUUID() + PATH_SEPERATOR;
String path = this.path + PATH_SEPARATOR + task.getUUID() + PATH_SEPARATOR;
try {
PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(new FileOutputStream(getFile(path + DigestUtils.md5Hex(resultItems.getRequest().getUrl()) + ".html")),"UTF-8"));
printWriter.println("url:\t" + resultItems.getRequest().getUrl());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<html>
<body>
<title>"Package"</title>
Pipeline is the persistent and offline process part of crawler.
</body>
</html>
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
<!DOCTYPE html>
<html>
<title>"Package"</title>
<body>
PageProcessor custom part of a crawler for specific site.
</body>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<html>
<body>
<title>"Package"</title>
Scheduler is the part of url management.
</body>
</html>
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,22 @@ public class FilePersistentBase {

protected String path;

public static String PATH_SEPERATOR = "/";

static {
String property = System.getProperties().getProperty("file.separator");
if (property != null) {
PATH_SEPERATOR = property;
}
}
protected static final String PATH_SEPARATOR = System.getProperties().getProperty("file.separator");

public void setPath(String path) {
if (!path.endsWith(PATH_SEPERATOR)) {
path += PATH_SEPERATOR;
if (!path.endsWith(PATH_SEPARATOR)) {
path += PATH_SEPARATOR;
}
this.path = path;
}

public File getFile(String fullName) {
protected File getFile(String fullName) {
checkAndMakeParentDirecotry(fullName);
return new File(fullName);
}

public void checkAndMakeParentDirecotry(String fullName) {
int index = fullName.lastIndexOf(PATH_SEPERATOR);
private void checkAndMakeParentDirecotry(String fullName) {
int index = fullName.lastIndexOf(PATH_SEPARATOR);
if (index > 0) {
String path = fullName.substring(0, index);
File file = new File(path);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ public T format(String raw) throws Exception {

protected abstract T formatTrimmed(String raw) throws Exception;

public static final List<Class<? extends ObjectFormatter>> basicTypeFormatters = Arrays.<Class<? extends ObjectFormatter>>asList(IntegerFormatter.class,
protected static final List<Class<? extends ObjectFormatter>> basicTypeFormatters = Arrays.<Class<? extends ObjectFormatter>>asList(IntegerFormatter.class,
LongFormatter.class, DoubleFormatter.class, FloatFormatter.class, ShortFormatter.class,
CharactorFormatter.class, ByteFormatter.class, BooleanFormatter.class);

public static Class<?> detectBasicClass(Class<?> type) {
protected static Class<?> detectBasicClass(Class<?> type) {
if (type.equals(Integer.TYPE) || type.equals(Integer.class)) {
return Integer.class;
} else if (type.equals(Long.TYPE) || type.equals(Long.class)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
<!DOCTYPE html>
<html>
<title ="Package"/>
<body>
Page model and annotations used to customize a crawler.
</body>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public FilePageModelPipeline(String path) {

@Override
public void process(Object o, Task task) {
String path = this.path + PATH_SEPERATOR + task.getUUID() + PATH_SEPERATOR;
String path = this.path + PATH_SEPARATOR + task.getUUID() + PATH_SEPARATOR;
try {
String filename;
if (o instanceof HasKey) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public JsonFilePageModelPipeline(String path) {

@Override
public void process(Object o, Task task) {
String path = this.path + PATH_SEPERATOR + task.getUUID() + PATH_SEPERATOR;
String path = this.path + PATH_SEPARATOR + task.getUUID() + PATH_SEPARATOR;
try {
String filename;
if (o instanceof HasKey) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public JsonFilePipeline(String path) {

@Override
public void process(ResultItems resultItems, Task task) {
String path = this.path + PATH_SEPERATOR + task.getUUID() + PATH_SEPERATOR;
String path = this.path + PATH_SEPARATOR + task.getUUID() + PATH_SEPARATOR;
try {
PrintWriter printWriter = new PrintWriter(new FileWriter(getFile(path + DigestUtils.md5Hex(resultItems.getRequest().getUrl()) + ".json")));
printWriter.write(JSON.toJSONString(resultItems.getAll()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,14 @@ public static String getFirstNoLoopbackIPAddresses() throws SocketException {
Enumeration<InetAddress> inetAddresses = networkInterface.getInetAddresses();
while (inetAddresses.hasMoreElements()) {
InetAddress address = inetAddresses.nextElement();
if (!address.isLoopbackAddress() && !Inet6Address.class.isInstance(address)) {
return address.getHostAddress();
} else if (!address.isLoopbackAddress()) {
localAddress = address;
if (address != null) {
if (!address.isLoopbackAddress() && !Inet6Address.class.isInstance(address)) {
return address.getHostAddress();
} else if (!address.isLoopbackAddress()) {
localAddress = address;
}
}else{
throw new NullPointerException("Address is null");
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
Expand Down Expand Up @@ -126,7 +127,9 @@ public String select(String text) {
return item.getTextContent();
} else {
StreamResult xmlOutput = new StreamResult(new StringWriter());
Transformer transformer = TransformerFactory.newInstance().newTransformer();
TransformerFactory tff = TransformerFactory.newInstance();
tff.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
Transformer transformer = tff.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer.transform(new DOMSource(item), xmlOutput);
return xmlOutput.getWriter().toString();
Expand Down Expand Up @@ -154,7 +157,9 @@ public List<String> selectList(String text) {
}
if (result instanceof NodeList) {
NodeList nodeList = (NodeList) result;
Transformer transformer = TransformerFactory.newInstance().newTransformer();
TransformerFactory tff = TransformerFactory.newInstance();
tff.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
Transformer transformer = tff.newTransformer();
StreamResult xmlOutput = new StreamResult();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
for (int i = 0; i < nodeList.getLength(); i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ public Page download(Request request, Task task) {
}
WebDriver.Options manage = webDriver.manage();
Site site = task.getSite();
if (site.getCookies() != null) {
for (Map.Entry<String, String> cookieEntry : site.getCookies()
if (site.getDefaultCookies() != null) {
for (Map.Entry<String, String> cookieEntry : site.getDefaultCookies()
.entrySet()) {
Cookie cookie = new Cookie(cookieEntry.getKey(),
cookieEntry.getValue());
Expand Down