Hi everyone~ I'm new to python ~ Does anyone know that I can crawl the data(the string mix with Chinese and English) into python_job.csv and I can open with excel but the Chinese part all become gibberish, but the bad situation only happened in csv , if I open with notepad or txt it works in normally, I try for .decode('unicode_escape') or .encode('unicode_escape') that I can encode the chinese into unicode but I still can not transfer them into normal way.
def get_link(self,response):
global get_par
data = json.loads(response.body)
total_count = int(data['TOTALPAGE'])
for detail in data['data']:
for value in detail.values():
try:
if value.index(u'Python') or value.index('Python') or value.index('python') or value.index(u'python'):
item = Article()
item['name'] = detail['JOB']
item['link'] = 'http://www.104.com.tw/jobbank/cust_job/job.cfm?j=' + detail['J']
item['content'] = detail['DESCRIPTION']
item['skill'] = detail['OTHERS']
global f_pos, col
for i in title:
if i is 'name':
f_pos.loc[col,i] = item['name']
elif i is 'link':
f_pos.loc[col,i] = item['link']
elif i is 'content':
f_pos.loc[col,i] = item['name']
else:
f_pos.loc[col,i] = item['skill']
col = col + 1
yield item
except Exception,e:
print e
global count
count = count + 1
if count <= total_count:
yield Request(
url='http://www.104.com.tw/i/apis/jobsearch.cfm?kws=研發替代役&page='+ str(count) +'&'+ get_par,
dont_filter=True,
callback=self.get_link
)
else:
f_pos.to_csv('python_job.csv',columns=title, encoding='utf8')