The timeframe parameter (any hourly and daily flags tested) keeps breaking the code, giving Google 429 even in the first call (not over calling) in code that has been running for long without any issues. (most updated version of pytrends)
I found that GG can track whether an IP address is scraping or not, so you can random interval time between performing automation step, and I highly recommend using proxies. An IP can scrape data again without getting 429 if it "relax" for more than one or two hourse after performing downloading a bunch of keywords
So, has this issue been resolved? I tried the aforementioned method of adding the custom header into "dailydata.py", but I still encounter the 429 Error...
from pytrends.request import TrendReq
import json
import concurrent.futures
from pytrends import dailydata
import pandas as pd
import time
#pytrend = TrendReq(hl='en-US',tz=360)
df = dailydata.get_daily_data('Rice', 2004, 1, 2022, 9, geo = 'US')
df.to_excel('Rice_6000.xlsx')
print('Complete')
Rice:2004-01-01 2004-01-31
---------------------------------------------------------------------------
TooManyRequestsError Traceback (most recent call last)
<ipython-input-1-44c7758dfcd6> in <module>
---> 13 df = dailydata.get_daily_data('Rice', 2004, 1, 2022, 9, geo = 'US')
14 df.to_excel('Rice_6000.xlsx')
15 print('Complete')
~\Anaconda3\lib\site-packages\pytrends\dailydata.py in get_daily_data(word, start_year, start_mon, stop_year, stop_mon, geo, verbose, wait_time)
140 if verbose:
141 print(f'{word}:{timeframe}')
--> 142 results[current] = _fetch_data(pytrends, build_payload, timeframe)
143 current = last_date_of_month + timedelta(days=1)
144 sleep(wait_time) # don't go too fast or Google will send 429s
~\Anaconda3\lib\site-packages\pytrends\dailydata.py in _fetch_data(pytrends, build_payload, timeframe)
70 else:
71 fetched = True
---> 72 return pytrends.interest_over_time()
~\Anaconda3\lib\site-packages\pytrends\request.py in interest_over_time(self)
233 method=TrendReq.GET_METHOD,
234 trim_chars=5,
--> 235 params=over_time_payload,
236 )
~\Anaconda3\lib\site-packages\pytrends\dailydata.py in _get_data(self, url, method, trim_chars, **kwargs)
35 class CustomTrendReq(TrendReq):
36 def _get_data(self, url, method=TrendReq.GET_METHOD, trim_chars=0, **kwargs):
---> 37 return super()._get_data(url, method=TrendReq.GET_METHOD, trim_chars=trim_chars, headers=headers, **kwargs)
39 def get_last_date_of_month(year: int, month: int) -> date:
~\Anaconda3\lib\site-packages\pytrends\request.py in _get_data(self, url, method, trim_chars, **kwargs)
156 else:
157 if response.status_code == status_codes.codes.too_many_requests:
--> 158 raise exceptions.TooManyRequestsError.from_response(response)
159 raise exceptions.ResponseError.from_response(response)
TooManyRequestsError: The request failed: Google returned a response with code 429
Same here, the example from the documentation fails with:
TooManyRequestsError Traceback (most recent call last)
[~\AppData\Local\Temp/ipykernel_23216/3968946112.py](https://file+.vscode-resource.vscode-cdn.net/c%3A/dev/Python/Forecasting_PY/~/AppData/Local/Temp/ipykernel_23216/3968946112.py) in <module>
9 # Interest Over Time
---> 10 interest_over_time_df = pytrend.interest_over_time()
11 print(interest_over_time_df.head())
[c:\ProgramData\anaconda3\envs\ML\lib\site-packages\pytrends\request.py](file:///C:/ProgramData/anaconda3/envs/ML/lib/site-packages/pytrends/request.py) in interest_over_time(self)
231 # make the request and parse the returned json
--> 232 req_json = self._get_data(
233 url=TrendReq.INTEREST_OVER_TIME_URL,
234 method=TrendReq.GET_METHOD,
[c:\ProgramData\anaconda3\envs\ML\lib\site-packages\pytrends\request.py](file:///C:/ProgramData/anaconda3/envs/ML/lib/site-packages/pytrends/request.py) in _get_data(self, url, method, trim_chars, **kwargs)
157 else:
158 if response.status_code == status_codes.codes.too_many_requests:
--> 159 raise exceptions.TooManyRequestsError.from_response(response)
160 raise exceptions.ResponseError.from_response(response)
TooManyRequestsError: The request failed: Google returned a response with code 429