Skip to main content

VertexAI [Anthropic, Gemini, Model Garden]

Open In Colab

🆕 vertex_ai_beta/ route

New vertex_ai_beta/ route. Adds support for system messages, tool_choice params, etc. by moving to httpx client (instead of vertex sdk). This implementation uses VertexAI's REST API.

from litellm import completion
import json

## GET CREDENTIALS
file_path = 'path/to/vertex_ai_service_account.json'

# Load the JSON file
with open(file_path, 'r') as file:
vertex_credentials = json.load(file)

# Convert to JSON string
vertex_credentials_json = json.dumps(vertex_credentials)

## COMPLETION CALL
response = completion(
model="vertex_ai_beta/gemini-pro",
messages=[{ "content": "Hello, how are you?","role": "user"}],
vertex_credentials=vertex_credentials_json
)

System Message

from litellm import completion
import json

## GET CREDENTIALS
file_path = 'path/to/vertex_ai_service_account.json'

# Load the JSON file
with open(file_path, 'r') as file:
vertex_credentials = json.load(file)

# Convert to JSON string
vertex_credentials_json = json.dumps(vertex_credentials)


response = completion(
model="vertex_ai_beta/gemini-pro",
messages=[{"content": "You are a good bot.","role": "system"}, {"content": "Hello, how are you?","role": "user"}],
vertex_credentials=vertex_credentials_json
)

Function Calling

Force Gemini to make tool calls with tool_choice="required".

from litellm import completion
import json

## GET CREDENTIALS
file_path = 'path/to/vertex_ai_service_account.json'

# Load the JSON file
with open(file_path, 'r') as file:
vertex_credentials = json.load(file)

# Convert to JSON string
vertex_credentials_json = json.dumps(vertex_credentials)


messages = [
{
"role": "system",
"content": "Your name is Litellm Bot, you are a helpful assistant",
},
# User asks for their name and weather in San Francisco
{
"role": "user",
"content": "Hello, what is your name and can you tell me the weather?",
},
]

tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
}
},
"required": ["location"],
},
},
}
]

data = {
"model": "vertex_ai_beta/gemini-1.5-pro-preview-0514"),
"messages": messages,
"tools": tools,
"tool_choice": "required",
"vertex_credentials": vertex_credentials_json
}

## COMPLETION CALL
print(completion(**data))

JSON Schema

From v1.40.1+ LiteLLM supports sending response_schema as a param for Gemini-1.5-Pro on Vertex AI. For other models (e.g. gemini-1.5-flash or claude-3-5-sonnet), LiteLLM adds the schema to the message list with a user-controlled prompt.

Response Schema

from litellm import completion 
import json

## SETUP ENVIRONMENT
# !gcloud auth application-default login - run this to add vertex credentials to your env

messages = [
{
"role": "user",
"content": "List 5 popular cookie recipes."
}
]

response_schema = {
"type": "array",
"items": {
"type": "object",
"properties": {
"recipe_name": {
"type": "string",
},
},
"required": ["recipe_name"],
},
}


completion(
model="vertex_ai_beta/gemini-1.5-pro",
messages=messages,
response_format={"type": "json_object", "response_schema": response_schema} # 👈 KEY CHANGE
)

print(json.loads(completion.choices[0].message.content))

Validate Schema

To validate the response_schema, set enforce_validation: true.

from litellm import completion, JSONSchemaValidationError
try:
completion(
model="vertex_ai_beta/gemini-1.5-pro",
messages=messages,
response_format={
"type": "json_object",
"response_schema": response_schema,
"enforce_validation": true # 👈 KEY CHANGE
}
)
except JSONSchemaValidationError as e:
print("Raw Response: {}".format(e.raw_response))
raise e

LiteLLM will validate the response against the schema, and raise a JSONSchemaValidationError if the response does not match the schema.

JSONSchemaValidationError inherits from openai.APIError

Access the raw response with e.raw_response

Add to prompt yourself

from litellm import completion 

## GET CREDENTIALS
file_path = 'path/to/vertex_ai_service_account.json'

# Load the JSON file
with open(file_path, 'r') as file:
vertex_credentials = json.load(file)

# Convert to JSON string
vertex_credentials_json = json.dumps(vertex_credentials)

messages = [
{
"role": "user",
"content": """
List 5 popular cookie recipes.

Using this JSON schema:

Recipe = {"recipe_name": str}

Return a `list[Recipe]`
"""
}
]

completion(model="vertex_ai_beta/gemini-1.5-flash-preview-0514", messages=messages, response_format={ "type": "json_object" })

Grounding

Add Google Search Result grounding to vertex ai calls.

Relevant VertexAI Docs

See the grounding metadata with response_obj._hidden_params["vertex_ai_grounding_metadata"]

from litellm import completion 

## SETUP ENVIRONMENT
# !gcloud auth application-default login - run this to add vertex credentials to your env

tools = [{"googleSearchRetrieval": {}}] # 👈 ADD GOOGLE SEARCH

resp = litellm.completion(
model="vertex_ai_beta/gemini-1.0-pro-001",
messages=[{"role": "user", "content": "Who won the world cup?"}],
tools=tools,
)

print(resp)

Moving from Vertex AI SDK to LiteLLM (GROUNDING)

If this was your initial VertexAI Grounding code,

import vertexai 

vertexai.init(project=project_id, location="us-central1")

model = GenerativeModel("gemini-1.5-flash-001")

# Use Google Search for grounding
tool = Tool.from_google_search_retrieval(grounding.GoogleSearchRetrieval(disable_attributon=False))

prompt = "When is the next total solar eclipse in US?"
response = model.generate_content(
prompt,
tools=[tool],
generation_config=GenerationConfig(
temperature=0.0,
),
)

print(response)

then, this is what it looks like now

from litellm import completion 


# !gcloud auth application-default login - run this to add vertex credentials to your env

tools = [{"googleSearchRetrieval": {"disable_attributon": False}}] # 👈 ADD GOOGLE SEARCH

resp = litellm.completion(
model="vertex_ai_beta/gemini-1.0-pro-001",
messages=[{"role": "user", "content": "Who won the world cup?"}],
tools=tools,
vertex_project="project-id"
)

print(resp)

Context Caching

Use Vertex AI Context Caching

Relevant VertexAI Docs

  1. Add model to config.yaml
model_list:
# used for /chat/completions, /completions, /embeddings endpoints
- model_name: gemini-1.5-pro-001
litellm_params:
model: vertex_ai_beta/gemini-1.5-pro-001
vertex_project: "project-id"
vertex_location: "us-central1"
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json

# used for the /cachedContent and vertexAI native endpoints
default_vertex_config:
vertex_project: "adroit-crow-413218"
vertex_location: "us-central1"
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json

  1. Start Proxy
$ litellm --config /path/to/config.yaml
  1. Make Request! We make the request in two steps:
  • Create a cachedContents object
  • Use the cachedContents object in your /chat/completions

Create a cachedContents object

First, create a cachedContents object by calling the Vertex cachedContents endpoint. The LiteLLM proxy forwards the /cachedContents request to the VertexAI API.

import httpx

# Set Litellm proxy variables
LITELLM_BASE_URL = "http://0.0.0.0:4000"
LITELLM_PROXY_API_KEY = "sk-1234"

httpx_client = httpx.Client(timeout=30)

print("Creating cached content")
create_cache = httpx_client.post(
url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
json={
"model": "gemini-1.5-pro-001",
"contents": [
{
"role": "user",
"parts": [{
"text": "This is sample text to demonstrate explicit caching." * 4000
}]
}
],
}
)

print("Response from create_cache:", create_cache)
create_cache_response = create_cache.json()
print("JSON from create_cache:", create_cache_response)
cached_content_name = create_cache_response["name"]

Use the cachedContents object in your /chat/completions request to VertexAI

import openai

# Set Litellm proxy variables
LITELLM_BASE_URL = "http://0.0.0.0:4000"
LITELLM_PROXY_API_KEY = "sk-1234"

client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)

response = client.chat.completions.create(
model="gemini-1.5-pro-001",
max_tokens=8192,
messages=[
{
"role": "user",
"content": "What is the sample text about?",
},
],
temperature=0.7,
extra_body={"cached_content": cached_content_name}, # Use the cached content
)

print("Response from proxy:", response)

Pre-requisites

  • pip install google-cloud-aiplatform (pre-installed on proxy docker image)

  • Authentication:

    • run gcloud auth application-default login See Google Cloud Docs

    • Alternatively you can set GOOGLE_APPLICATION_CREDENTIALS

      Here's how: Jump to Code

      • Create a service account on GCP
      • Export the credentials as a json
      • load the json and json.dump the json as a string
      • store the json string in your environment as GOOGLE_APPLICATION_CREDENTIALS

Sample Usage

import litellm
litellm.vertex_project = "hardy-device-38811" # Your Project ID
litellm.vertex_location = "us-central1" # proj location

response = litellm.completion(model="gemini-pro", messages=[{"role": "user", "content": "write code for saying hi from LiteLLM"}])

Usage with LiteLLM Proxy Server

Here's how to use Vertex AI with the LiteLLM Proxy Server

  1. Modify the config.yaml

    Use this when you need to set a different location for each vertex model

    model_list:
    - model_name: gemini-vision
    litellm_params:
    model: vertex_ai/gemini-1.0-pro-vision-001
    vertex_project: "project-id"
    vertex_location: "us-central1"
    - model_name: gemini-vision
    litellm_params:
    model: vertex_ai/gemini-1.0-pro-vision-001
    vertex_project: "project-id2"
    vertex_location: "us-east"
  2. Start the proxy

    $ litellm --config /path/to/config.yaml
  3. Send Request to LiteLLM Proxy Server

    import openai
    client = openai.OpenAI(
    api_key="sk-1234", # pass litellm proxy key, if you're using virtual keys
    base_url="http://0.0.0.0:4000" # litellm-proxy-base url
    )

    response = client.chat.completions.create(
    model="team1-gemini-pro",
    messages = [
    {
    "role": "user",
    "content": "what llm are you"
    }
    ],
    )

    print(response)

Specifying Safety Settings

In certain use-cases you may need to make calls to the models and pass safety settigns different from the defaults. To do so, simple pass the safety_settings argument to completion or acompletion. For example:

response = completion(
model="vertex_ai/gemini-pro",
messages=[{"role": "user", "content": "write code for saying hi from LiteLLM"}]
safety_settings=[
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_NONE",
},
]
)

Set Vertex Project & Vertex Location

All calls using Vertex AI require the following parameters:

  • Your Project ID
import os, litellm 

# set via env var
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811" # Your Project ID`

### OR ###

# set directly on module
litellm.vertex_project = "hardy-device-38811" # Your Project ID`
  • Your Project Location
import os, litellm 

# set via env var
os.environ["VERTEXAI_LOCATION"] = "us-central1 # Your Location

### OR ###

# set directly on module
litellm.vertex_location = "us-central1 # Your Location

Anthropic

Model NameFunction Call
claude-3-opus@20240229completion('vertex_ai/claude-3-opus@20240229', messages)
claude-3-5-sonnet@20240620completion('vertex_ai/claude-3-5-sonnet@20240620', messages)
claude-3-sonnet@20240229completion('vertex_ai/claude-3-sonnet@20240229', messages)
claude-3-haiku@20240307completion('vertex_ai/claude-3-haiku@20240307', messages)

Usage

from litellm import completion
import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ""

model = "claude-3-sonnet@20240229"

vertex_ai_project = "your-vertex-project" # can also set this as os.environ["VERTEXAI_PROJECT"]
vertex_ai_location = "your-vertex-location" # can also set this as os.environ["VERTEXAI_LOCATION"]

response = completion(
model="vertex_ai/" + model,
messages=[{"role": "user", "content": "hi"}],
temperature=0.7,
vertex_ai_project=vertex_ai_project,
vertex_ai_location=vertex_ai_location,
)
print("\nModel Response", response)

Llama 3 API

Model NameFunction Call
meta/llama3-405b-instruct-maascompletion('vertex_ai/meta/llama3-405b-instruct-maas', messages)

Usage

from litellm import completion
import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ""

model = "meta/llama3-405b-instruct-maas"

vertex_ai_project = "your-vertex-project" # can also set this as os.environ["VERTEXAI_PROJECT"]
vertex_ai_location = "your-vertex-location" # can also set this as os.environ["VERTEXAI_LOCATION"]

response = completion(
model="vertex_ai/" + model,
messages=[{"role": "user", "content": "hi"}],
vertex_ai_project=vertex_ai_project,
vertex_ai_location=vertex_ai_location,
)
print("\nModel Response", response)

Mistral API

Supported OpenAI Params

Model NameFunction Call
mistral-large@latestcompletion('vertex_ai/mistral-large@latest', messages)
mistral-large@2407completion('vertex_ai/mistral-large@2407', messages)
mistral-nemo@latestcompletion('vertex_ai/mistral-nemo@latest', messages)
codestral@latestcompletion('vertex_ai/codestral@latest', messages)
codestral@@2405completion('vertex_ai/codestral@2405', messages)

Usage

from litellm import completion
import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ""

model = "mistral-large@2407"

vertex_ai_project = "your-vertex-project" # can also set this as os.environ["VERTEXAI_PROJECT"]
vertex_ai_location = "your-vertex-location" # can also set this as os.environ["VERTEXAI_LOCATION"]

response = completion(
model="vertex_ai/" + model,
messages=[{"role": "user", "content": "hi"}],
vertex_ai_project=vertex_ai_project,
vertex_ai_location=vertex_ai_location,
)
print("\nModel Response", response)

Usage - Codestral FIM

Call Codestral on VertexAI via the OpenAI /v1/completion endpoint for FIM tasks.

Note: You can also call Codestral via /chat/completion.

from litellm import completion
import os

# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ""
# OR run `!gcloud auth print-access-token` in your terminal

model = "codestral@2405"

vertex_ai_project = "your-vertex-project" # can also set this as os.environ["VERTEXAI_PROJECT"]
vertex_ai_location = "your-vertex-location" # can also set this as os.environ["VERTEXAI_LOCATION"]

response = text_completion(
model="vertex_ai/" + model,
vertex_ai_project=vertex_ai_project,
vertex_ai_location=vertex_ai_location,
prompt="def is_odd(n): \n return n % 2 == 1 \ndef test_is_odd():",
suffix="return True", # optional
temperature=0, # optional
top_p=1, # optional
max_tokens=10, # optional
min_tokens=10, # optional
seed=10, # optional
stop=["return"], # optional
)

print("\nModel Response", response)

Model Garden

Model NameFunction Call
llama2completion('vertex_ai/<endpoint_id>', messages)

Using Model Garden

from litellm import completion
import os

## set ENV variables
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
os.environ["VERTEXAI_LOCATION"] = "us-central1"

response = completion(
model="vertex_ai/<your-endpoint-id>",
messages=[{ "content": "Hello, how are you?","role": "user"}]
)

Gemini Pro

Model NameFunction Call
gemini-procompletion('gemini-pro', messages), completion('vertex_ai/gemini-pro', messages)

Gemini Pro Vision

Model NameFunction Call
gemini-pro-visioncompletion('gemini-pro-vision', messages), completion('vertex_ai/gemini-pro-vision', messages)

Gemini 1.5 Pro (and Vision)

Model NameFunction Call
gemini-1.5-procompletion('gemini-1.5-pro', messages), completion('vertex_ai/gemini-1.5-pro', messages)
gemini-1.5-flash-preview-0514completion('gemini-1.5-flash-preview-0514', messages), completion('vertex_ai/gemini-1.5-flash-preview-0514', messages)
gemini-1.5-pro-preview-0514completion('gemini-1.5-pro-preview-0514', messages), completion('vertex_ai/gemini-1.5-pro-preview-0514', messages)

Using Gemini Pro Vision

Call gemini-pro-vision in the same input/output format as OpenAI gpt-4-vision

LiteLLM Supports the following image types passed in url

Example Request - image url

import litellm

response = litellm.completion(
model = "vertex_ai/gemini-pro-vision",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "Whats in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
}
}
]
}
],
)
print(response)

Usage - Function Calling

LiteLLM supports Function Calling for Vertex AI gemini models.

from litellm import completion
import os
# set env
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ".."
os.environ["VERTEX_AI_PROJECT"] = ".."
os.environ["VERTEX_AI_LOCATION"] = ".."

tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]

response = completion(
model="vertex_ai/gemini-pro-vision",
messages=messages,
tools=tools,
)
# Add any assertions, here to check response args
print(response)
assert isinstance(response.choices[0].message.tool_calls[0].function.name, str)
assert isinstance(
response.choices[0].message.tool_calls[0].function.arguments, str
)

Usage - PDF / Videos / etc. Files

Pass any file supported by Vertex AI, through LiteLLM.

Using gs://

from litellm import completion

response = completion(
model="vertex_ai/gemini-1.5-flash",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
{
"type": "image_url",
"image_url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf", # 👈 PDF
},
],
}
],
max_tokens=300,
)

print(response.choices[0])

using base64

from litellm import completion
import base64
import requests

# URL of the file
url = "https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf"

# Download the file
response = requests.get(url)
file_data = response.content

encoded_file = base64.b64encode(file_data).decode("utf-8")

response = completion(
model="vertex_ai/gemini-1.5-flash",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
{
"type": "image_url",
"image_url": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
},
],
}
],
max_tokens=300,
)

print(response.choices[0])

Chat Models

Model NameFunction Call
chat-bison-32kcompletion('chat-bison-32k', messages)
chat-bisoncompletion('chat-bison', messages)
chat-bison@001completion('chat-bison@001', messages)

Code Chat Models

Model NameFunction Call
codechat-bisoncompletion('codechat-bison', messages)
codechat-bison-32kcompletion('codechat-bison-32k', messages)
codechat-bison@001completion('codechat-bison@001', messages)

Text Models

Model NameFunction Call
text-bisoncompletion('text-bison', messages)
text-bison@001completion('text-bison@001', messages)

Code Text Models

Model NameFunction Call
code-bisoncompletion('code-bison', messages)
code-bison@001completion('code-bison@001', messages)
code-gecko@001completion('code-gecko@001', messages)
code-gecko@latestcompletion('code-gecko@latest', messages)

Embedding Models

Usage - Embedding

import litellm
from litellm import embedding
litellm.vertex_project = "hardy-device-38811" # Your Project ID
litellm.vertex_location = "us-central1" # proj location

response = embedding(
model="vertex_ai/textembedding-gecko",
input=["good morning from litellm"],
)
print(response)

Supported Embedding Models

All models listed here are supported

Model NameFunction Call
text-embedding-004embedding(model="vertex_ai/text-embedding-004", input)
text-multilingual-embedding-002embedding(model="vertex_ai/text-multilingual-embedding-002", input)
textembedding-geckoembedding(model="vertex_ai/textembedding-gecko", input)
textembedding-gecko-multilingualembedding(model="vertex_ai/textembedding-gecko-multilingual", input)
textembedding-gecko-multilingual@001embedding(model="vertex_ai/textembedding-gecko-multilingual@001", input)
textembedding-gecko@001embedding(model="vertex_ai/textembedding-gecko@001", input)
textembedding-gecko@003embedding(model="vertex_ai/textembedding-gecko@003", input)
text-embedding-preview-0409embedding(model="vertex_ai/text-embedding-preview-0409", input)
text-multilingual-embedding-preview-0409embedding(model="vertex_ai/text-multilingual-embedding-preview-0409", input)

Advanced Use task_type and title (Vertex Specific Params)

👉 task_type and title are vertex specific params

LiteLLM Supported Vertex Specific Params

auto_truncate: Optional[bool] = None
task_type: Optional[Literal["RETRIEVAL_QUERY","RETRIEVAL_DOCUMENT", "SEMANTIC_SIMILARITY", "CLASSIFICATION", "CLUSTERING", "QUESTION_ANSWERING", "FACT_VERIFICATION"]] = None
title: Optional[str] = None # The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).

Example Usage with LiteLLM

response = litellm.embedding(
model="vertex_ai/text-embedding-004",
input=["good morning from litellm", "gm"]
task_type = "RETRIEVAL_DOCUMENT",
dimensions=1,
auto_truncate=True,
)

Image Generation Models

Usage

response = await litellm.aimage_generation(
prompt="An olympic size swimming pool",
model="vertex_ai/imagegeneration@006",
vertex_ai_project="adroit-crow-413218",
vertex_ai_location="us-central1",
)

Generating multiple images

Use the n parameter to pass how many images you want generated

response = await litellm.aimage_generation(
prompt="An olympic size swimming pool",
model="vertex_ai/imagegeneration@006",
vertex_ai_project="adroit-crow-413218",
vertex_ai_location="us-central1",
n=1,
)

Extra

Using GOOGLE_APPLICATION_CREDENTIALS

Here's the code for storing your service account credentials as GOOGLE_APPLICATION_CREDENTIALS environment variable:

def load_vertex_ai_credentials():
# Define the path to the vertex_key.json file
print("loading vertex ai credentials")
filepath = os.path.dirname(os.path.abspath(__file__))
vertex_key_path = filepath + "/vertex_key.json"

# Read the existing content of the file or create an empty dictionary
try:
with open(vertex_key_path, "r") as file:
# Read the file content
print("Read vertexai file path")
content = file.read()

# If the file is empty or not valid JSON, create an empty dictionary
if not content or not content.strip():
service_account_key_data = {}
else:
# Attempt to load the existing JSON content
file.seek(0)
service_account_key_data = json.load(file)
except FileNotFoundError:
# If the file doesn't exist, create an empty dictionary
service_account_key_data = {}

# Create a temporary file
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file:
# Write the updated content to the temporary file
json.dump(service_account_key_data, temp_file, indent=2)

# Export the temporary file as GOOGLE_APPLICATION_CREDENTIALS
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.abspath(temp_file.name)

Using GCP Service Account

info

Trying to deploy LiteLLM on Google Cloud Run? Tutorial here

  1. Figure out the Service Account bound to the Google Cloud Run service
  1. Get the FULL EMAIL address of the corresponding Service Account

  2. Next, go to IAM & Admin > Manage Resources , select your top-level project that houses your Google Cloud Run Service

Click Add Principal

  1. Specify the Service Account as the principal and Vertex AI User as the role

Once that's done, when you deploy the new container in the Google Cloud Run service, LiteLLM will have automatic access to all Vertex AI endpoints.

s/o @Darien Kindlund for this tutorial