2020. 11. 9. 14:42
pandas를 활용하여 읽어온 csv 파일에서 특정 column에 대한 중복값을 제거하기 위하여
아래와 같이 먼저 파일을 엑셀에서 읽어온 후
import pandas as pd
data = pd.read_excel("./42_nia_pqa_QueryTemplate.xlsx")
data_fillna = data.fillna(0.0)
data_fillna_dropna = data_fillna[data_fillna['property'] != 0.0]
data_fillna_dropna.head()
특정 값 이외의 값만 남기기 위하여 위와 같이 코드를 실행하였으나
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-25-95f7c3a3bb77> in <module>
----> 1 data_fillna_dropna = data_fillna[data_fillna['property'] != 0.0]
2 data_fillna_dropna.head()
~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
2912 # Do we have a (boolean) DataFrame?
2913 if isinstance(key, DataFrame):
-> 2914 return self._getitem_frame(key)
2916 # Do we have a (boolean) 1d indexer?
~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _getitem_frame(self, key)
3008 if key.values.size and not is_bool_dtype(key.values):
3009 raise ValueError('Must pass DataFrame with boolean values only')
-> 3010 return self.where(key)
3012 def query(self, expr, inplace=False, **kwargs):
~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in where(self, cond, other, inplace, axis, level, errors, try_cast, raise_on_error)
8832 other = com.apply_if_callable(other, self)
8833 return self._where(cond, other, inplace, axis, level,
-> 8834 errors=errors, try_cast=try_cast)
8836 @Appender(_shared_docs['where'] % dict(_shared_doc_kwargs, cond="False",
~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in _where(self, cond, other, inplace, axis, level, errors, try_cast)
8686 errors=errors,
8687 try_cast=try_cast, axis=block_axis,
-> 8688 transpose=self._AXIS_REVERSED)
8690 return self._constructor(new_data).__finalize__(self)
~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in where(self, **kwargs)
506 def where(self, **kwargs):
--> 507 return self.apply('where', **kwargs)
509 def setitem(self, **kwargs):
~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
391 axis = getattr(obj, '_info_axis_number', 0)
392 kwargs[k] = obj.reindex(b_items, axis=axis,
--> 393 copy=align_copy)
395 applied = getattr(b, f)(**kwargs)
~/anaconda3/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
195 @wraps(func)
196 def wrapper(*args, **kwargs):
--> 197 return func(*args, **kwargs)
199 if not PY2:
~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in reindex(self, *args, **kwargs)
3807 kwargs.pop('axis', None)
3808 kwargs.pop('labels', None)
-> 3809 return super(DataFrame, self).reindex(**kwargs)
3811 @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs)
~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
4354 # perform the reindex on the axes
4355 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 4356 fill_value, copy).__finalize__(self)
4358 def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,
~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
3734 if columns is not None:
3735 frame = frame._reindex_columns(columns, method, copy, level,
-> 3736 fill_value, limit, tolerance)
3738 index = axes['index']
~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_columns(self, new_columns, method, copy, level, fill_value, limit, tolerance)
3759 return self._reindex_with_indexers({1: [new_columns, indexer]},
3760 copy=copy, fill_value=fill_value,
-> 3761 allow_dups=False)
3763 def _reindex_multi(self, axes, copy, fill_value):
~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
4488 fill_value=fill_value,
4489 allow_dups=allow_dups,
-> 4490 copy=copy)
4492 if copy and new_data is self._data:
~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
1222 # some axes don't allow reindexing with dups
1223 if not allow_dups:
-> 1224 self.axes[axis]._can_reindex(indexer)
1226 if axis >= self.ndim:
~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in _can_reindex(self, indexer)
3085 # trying to reindex on an axis with duplicates
3086 if not self.is_unique and len(indexer):
-> 3087 raise ValueError("cannot reindex from a duplicate axis")
3089 def reindex(self, target, method=None, level=None, limit=None,
ValueError: cannot reindex from a duplicate axis
위와 같은 오류가 발생하였습니다.
데이터를 확인해보니 중복된 column이 존재하였고 이 중복된 column으로 인하여 발생하는 오류였습니다.
해결 방법
해결 방법은 중복된 두 column 중에 하나의 column 이름을 변경해주면 됩니다.
columns = list(data_fillna_dropna.columns)
columns[3] = "property_not_use"
data_fillna_dropna.columns = columns
방법은 정말 다양하게 존재하지만 저는 기존 columns 를 불러와서 중복된 column 중 활용하지 않는 column 만
이름을 변경해 보았습니다.
이렇게 변경하고 나면 정상적으로 동작하는 것을 확인할 수 있을 겁니다.
읽어주셔서 감사합니다.
[Python] PDFInfoNotInstalledError: Unable to get page count. Is poppler installed and in PATH? 해결방법 (0)
2020.12.01
[Python] Python을 활용하여 대량의 한글 파일(hwp) 썸네일 편하게 만들어보기! (2)
2020.11.04
[Python] PIL(Pillow) 라이브러리를 활용하여 이미지 resize 시 깨지지 않게 저장하는 방법! (0)
2020.11.04
[Python] str 형식의 list 문자열 list 형식으로 변환하는 방법 ( str list to list python ) (6)
2020.11.02
[Python] PDFInfoNotInstalledError: Unable to get page count. Is poppler installed and in PATH? 해결방법
2020.12.01