Skip to content
This repository has been archived by the owner on Jun 10, 2024. It is now read-only.

Commit

Permalink
fix typo last_modif{,i}ed
Browse files Browse the repository at this point in the history
  • Loading branch information
binux committed Apr 20, 2016
1 parent 39eecef commit c8d4558
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 7 deletions.
2 changes: 1 addition & 1 deletion docs/apis/self.crawl.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class Handler(BaseHandler):
> `Handler.crawl_config` can be used with `proxy` to set a proxy for whole project.
* `etag` - use HTTP Etag mechanism to pass the process if the content of the page is not changed. _default: True_ <a name="etag" href="#etag">¶</a>
* `last_modifed` - use HTTP Last-Modified header mechanism to pass the process if the content of the page is not changed. _default: True_ <a name="last_modifed" href="#last_modifed">¶</a>
* `last_modified` - use HTTP Last-Modified header mechanism to pass the process if the content of the page is not changed. _default: True_ <a name="last_modified" href="#last_modified">¶</a>
* `fetch_type` - set to `js` to enable JavaScript fetcher. _default: None_ <a name="fetch_type" href="#fetch_type">¶</a>
* `js_script` - JavaScript run before or after page loaded, should been wrapped by a function like `function() { document.write("binux"); }`. <a name="js_script" href="#js_script">¶</a>

Expand Down
7 changes: 4 additions & 3 deletions pyspider/fetcher/tornado_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,10 +253,11 @@ def pack_tornado_request_parameters(self, url, task):
if _t and 'If-None-Match' not in fetch['headers']:
fetch['headers']['If-None-Match'] = _t
# last modifed
if task_fetch.get('last_modified', True):
if task_fetch.get('last_modified', task_fetch.get('last_modifed', True)):
last_modified = task_fetch.get('last_modified', task_fetch.get('last_modifed', True))
_t = None
if isinstance(task_fetch.get('last_modifed'), six.string_types):
_t = task_fetch.get('last_modifed')
if isinstance(last_modified, six.string_types):
_t = last_modified
elif track_ok:
_t = track_headers.get('last-modified')
if _t and 'If-Modified-Since' not in fetch['headers']:
Expand Down
3 changes: 2 additions & 1 deletion pyspider/libs/base_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ def _crawl(self, url, **kwargs):
'proxy',
'etag',
'last_modifed',
'last_modified',
'save',
'js_run_at',
'js_script',
Expand Down Expand Up @@ -332,7 +333,7 @@ def crawl(self, url, **kwargs):
cookies
proxy
etag
last_modifed
last_modified
auto_recrawl
fetch_type
Expand Down
4 changes: 2 additions & 2 deletions tests/test_fetcher_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,8 @@ def test_a160_etag(self):
self.assertFalse(newtasks)
self.assertFalse(result)

def test_a170_last_modifed(self):
status, newtasks, result = self.crawl(self.httpbin+'/cache', last_modifed='0', callback=self.json)
def test_a170_last_modified(self):
status, newtasks, result = self.crawl(self.httpbin+'/cache', last_modified='0', callback=self.json)

self.assertStatusOk(status)
self.assertFalse(newtasks)
Expand Down

0 comments on commit c8d4558

Please sign in to comment.