How to call an API from PySpark (in workers)

Tested in Databricks

import pyspark.sql.functions as F
import requests

# create dataframe
pokenumbers = [(i,) for i in range(100)]
cols = ["pokenum"]

df_pokenums = spark.createDataFrame(data=pokenumbers, schema=cols)

# call API
def get_name(rows):
    # take the first item in list (API doesn't support batch)
    first = rows[0]
    url = f'{first.pokenum}'
        resp = requests.get(url)
        name = resp.json()['pokemon']['name']
        name = 'did not work'
    return resp.status_code, name

# apply to partitions

Leave a Comment

This site uses Akismet to reduce spam. Learn how your comment data is processed.