For research purpose, I need to build a set of benign programs. First, I need to get these programs from http://downloads.informer.com. To do so, I have written a python script that iterates each downloading page and extract the download links into a list. After that the script uses these links to download the programs (these programs are exe, msi, or zip files). Unfortunately, at this step, the script runs into error stating that (AttributeError: 'Request' object has no attribute 'decode').
Following is the script that works on single page and retrieves single program (for simplicity):
import wget
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
my_url = 'http://sweet-home-3d.informer.com/download'
import urllib.request
req = urllib.request.Request(
my_url,
data=None,
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
}
)
uClient = uReq(req)
page_html = uClient.read()
page_soup = soup(page_html, 'lxml' )
cont01 = page_soup.findAll('a', {'class':'download_button'})
conts = cont01[1]
ref= conts['href']
addr = urllib.request.Request(
ref,
data=None,
headers={
'User-Agent': 'Mozilla/5.0'
}
)
wget.download(addr)
The error I get is following:
AttributeError Traceback (most recent call last)
<ipython-input-1-93c4caaa1777> in <module>()
31 }
32 )
---> 33 wget.download(addr)
C:\Users\bander\Anaconda3\lib\site-packages\wget.py in download(url, out, bar)
503
504 # get filename for temp file in current directory
--> 505 prefix = detect_filename(url, out)
506 (fd, tmpfile) = tempfile.mkstemp(".tmp", prefix=prefix, dir=".")
507 os.close(fd)
C:\Users\bander\Anaconda3\lib\site-packages\wget.py in detect_filename(url, out, headers, default)
482 names["out"] = out or ''
483 if url:
--> 484 names["url"] = filename_from_url(url) or ''
485 if headers:
486 names["headers"] = filename_from_headers(headers) or ''
C:\Users\bander\Anaconda3\lib\site-packages\wget.py in filename_from_url(url)
228 """:return: detected filename as unicode or None"""
229 # [ ] test urlparse behavior with unicode url
--> 230 fname = os.path.basename(urlparse.urlparse(url).path)
231 if len(fname.strip(" \n\t.")) == 0:
232 return None
C:\Users\bander\Anaconda3\lib\urllib\parse.py in urlparse(url, scheme, allow_fragments)
292 Note that we don't break the components up in smaller bits
293 (e.g. netloc is a single string) and we don't expand % escapes."""
--> 294 url, scheme, _coerce_result = _coerce_args(url, scheme)
295 splitresult = urlsplit(url, scheme, allow_fragments)
296 scheme, netloc, url, query, fragment = splitresult
C:\Users\bander\Anaconda3\lib\urllib\parse.py in _coerce_args(*args)
112 if str_input:
113 return args + (_noop,)
--> 114 return _decode_args(args) + (_encode_result,)
115
116 # Result objects are more helpful than simple tuples
C:\Users\bander\Anaconda3\lib\urllib\parse.py in _decode_args(args, encoding, errors)
96 def _decode_args(args, encoding=_implicit_encoding,
97 errors=_implicit_errors):
---> 98 return tuple(x.decode(encoding, errors) if x else '' for x in args)
99
100 def _coerce_args(*args):
C:\Users\bander\Anaconda3\lib\urllib\parse.py in <genexpr>(.0)
96 def _decode_args(args, encoding=_implicit_encoding,
97 errors=_implicit_errors):
---> 98 return tuple(x.decode(encoding, errors) if x else '' for x in args)
99
100 def _coerce_args(*args):
AttributeError: 'Request' object has no attribute 'decode'
I would be grateful if someone could help me fix this. Thanks beforehand.