You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
By clicking “Sign up for GitHub”, you agree to our
terms of service
and
privacy statement
. We’ll occasionally send you account related emails.
Already on GitHub?
Sign in
to your account
上图是我的数据前一部分,我的目的是对 titles 一列进行分词,分词的代码如下。现在遇到的问题是
AttributeError: 'int' object has no attribute 'decode'
,所以我认为是 titles 中有 int 所致,所以添加了一个判断条件,但是代码执行的结果依旧是报之前的错。请问这是什么原因?
def jiebait(text):
seglist = jieba.cut(text, cut_all = True)
fenci = []
for word in seglist:
if (not isinstance(word, int)) and (len(word) >= 2):
fenci.append(word)
# 如用搜索引擎模式:
#seglist = jieba.cut_for_search(text)
return ' '.join(fenci)
seglist = [str(w) for w in jieba.cut(text, cut_all = True)]
fenci = []
for word in seglist:
if (not isinstance(word, int)) and (len(word) >= 2):
fenci.append(word)
# 如用搜索引擎模式:
#seglist = jieba.cut_for_search(text)
return ' '.join(fenci)
AttributeError Traceback (most recent call last)
in ()
2 result_line = ""
3 # segment
----> 4 seg_list = [str(w) for w in jieba.cut(line, cut_all = False)]
5 # remove special character
6 temp = re.sub("[\s+.!_,$%^
(+"')]+|[+——()?【】“”!,。?、~@#¥%……&
()]+", "",("/".join(seg_list)))
in (.0)
2 result_line = ""
3 # segment
----> 4 seg_list = [str(w) for w in jieba.cut(line, cut_all = False)]
5 # remove special character
6 temp = re.sub("[\s+.!_,$%^
(+"')]+|[+——()?【】“”!,。?、~@#¥%……&
()]+", "",("/".join(seg_list)))
D:\Anaconda\lib\site-packages\jieba_
init
_.py in cut(self, sentence, cut_all, HMM)
280 - HMM: Whether to use the Hidden Markov Model.
281 '''
--> 282 sentence = strdecode(sentence)
284 if cut_all:
D:\Anaconda\lib\site-packages\jieba_compat.py in strdecode(sentence)
35 if not isinstance(sentence, text_type):
36 try:
---> 37 sentence = sentence.decode('utf-8')
38 except UnicodeDecodeError:
39 sentence = sentence.decode('gbk', 'ignore')
AttributeError: 'int' object has no attribute 'decode'