Tested in Databricks
import pyspark.sql.functions as F
import requests
# create dataframe
pokenumbers = [(i,) for i in range(100)]
cols = ["pokenum"]
df_pokenums = spark.createDataFrame(data=pokenumbers, schema=cols)
# call API
def get_name(rows):
# take the first item in list (API doesn't support batch)
first = rows[0]
url = f'https://pokeapi.co/api/v2/pokemon-form/{first.pokenum}'
try:
resp = requests.get(url)
name = resp.json()['pokemon']['name']
except:
name = 'did not work'
return resp.status_code, name
# apply to partitions
df_pokenums.repartition(10).rdd.glom().map(get_name).collect()