Skip to content
This repository has been archived by the owner on Jun 10, 2024. It is now read-only.

bugfix: connect redis message queue message queue when password has url encode chars like '#' #881

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
9 changes: 6 additions & 3 deletions pyspider/message_queue/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
except ImportError:
import urlparse

import six.moves.urllib_parse


def connect_message_queue(name, url=None, maxsize=0, lazy_limit=True):
"""
Expand Down Expand Up @@ -61,9 +63,10 @@ def connect_message_queue(name, url=None, maxsize=0, lazy_limit=True):
except:
logging.warning('redis DB must zero-based numeric index, using 0 instead')
db = 0

password = parsed.password or None

if parsed.password:
password = six.moves.urllib_parse.unquote(parsed.password)
else:
password = None
return Queue(name=name, host=parsed.hostname, port=parsed.port, db=db, maxsize=maxsize, password=password, lazy_limit=lazy_limit)
elif url.startswith('kombu+'):
url = url[len('kombu+'):]
Expand Down
5 changes: 4 additions & 1 deletion pyspider/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,22 +525,25 @@ def all(ctx, fetcher_num, processor_num, result_worker_num, run_in):

try:
# phantomjs
"""
if not g.get('phantomjs_proxy'):
phantomjs_config = g.config.get('phantomjs', {})
phantomjs_config.setdefault('auto_restart', True)
threads.append(run_in(ctx.invoke, phantomjs, **phantomjs_config))
time.sleep(2)
if threads[-1].is_alive() and not g.get('phantomjs_proxy'):
g['phantomjs_proxy'] = '127.0.0.1:%s' % phantomjs_config.get('port', 25555)

"""
# puppeteer
"""
if not g.get('puppeteer_proxy'):
puppeteer_config = g.config.get('puppeteer', {})
puppeteer_config.setdefault('auto_restart', True)
threads.append(run_in(ctx.invoke, puppeteer, **puppeteer_config))
time.sleep(2)
if threads[-1].is_alive() and not g.get('puppeteer_proxy'):
g['puppeteer_proxy'] = '127.0.0.1:%s' % puppeteer_config.get('port', 22222)
"""

# result worker
result_worker_config = g.config.get('result_worker', {})
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Jinja2==2.7
chardet==3.0.4
cssselect==0.9
lxml==4.3.3
pycurl==7.43.0.3
pycurl==7.43.0.1
pyquery==1.4.0
requests==2.24.0
tornado==4.5.3
Expand Down
12 changes: 8 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,20 @@
'chardet==3.0.4',
'cssselect==0.9',
"lxml==4.3.3",
'pycurl==7.43.0.3',
'requests==2.24.0',
'pycurl==7.43.0.1',
'requests<=2.27.1',
'Flask-Login==0.2.11',
'u-msgpack-python==1.6',
'click==3.3',
'click>=3.3,<=6.7',
'certifi<=2020.4.5',
'six==1.10.0',
'tblib==1.4.0',
'wsgidav==2.3.0',
'tornado>=3.2,<=4.5.3',
'pyquery',
'MarkupSafe<=1.1.1',
'itsdangerous<=1.1.0',
'Werkzeug<=0.16.1',
'pyquery<=1.4.1',
]

extras_require_all = [
Expand Down