As VMware administrators, we often need to collect and analyze data from our vSphere with Tanzu environments. Let's explore how to monitor a vSphere with Tanzu environment using the modern vSphere REST API approaches.
When manually logging in, you can find yourself in the API explorer, that is very neat to get started:
Authentication and Connection
Navigating the Tanzu Hierarchy
The modern API provides cleaner endpoints for navigating your Tanzu environment:
def get_tanzu_structure():
# Get Tanzu-enabled clusters
clusters_url = f'https://{vcenter_server}/api/vcenter/namespace-management/clusters'
clusters = session.get(clusters_url, headers=headers).json()
# Get namespaces
namespaces = []
for cluster in clusters:
ns_url = f'https://{vcenter_server}/api/vcenter/namespaces/instances'
cluster_namespaces = session.get(ns_url, headers=headers).json()
namespaces.extend(cluster_namespaces)
return clusters, namespaces
Performance Data Collection
The modern API provides a more streamlined approach to collecting performance metrics:
def get_vm_metrics(vm_id: str, metrics: List[str]):
metrics_url = f'https://{vcenter_server}/api/vcenter/vm/{vm_id}/stats'
params = {
'interval': 'REAL_TIME',
'metrics': metrics
}
response = session.get(metrics_url, headers=headers, params=params)
return response.json()
# Example collection of key metrics
def collect_performance_data(vm_id: str):
metrics = [
'cpu.utilization',
'memory.utilization',
'disk.used',
'network.received',
'network.transmitted'
]
return get_vm_metrics(vm_id, metrics)
Error Handling and Retries
Modern implementations should include proper error handling and retry logic:
from tenacity import retry, stop_after_attempt, wait_exponential
@retry(stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10))
def get_metrics_with_retry(vm_id: str):
try:
return get_vm_metrics(
vm_id,
['cpu.utilization', 'memory.utilization']
)
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching metrics for VM {vm_id}: {str(e)}")
raise
Batch Operations
For larger environments, batch operations are crucial:
async def get_batch_metrics(vm_ids: List[str]):
async with aiohttp.ClientSession() as session:
tasks = [
get_vm_metrics(session, vm_id)
for vm_id in vm_ids
]
return await asyncio.gather(*tasks)
Production Considerations
- Certificate Management:
def create_secure_session():
session = requests.Session()
session.verify = "/path/to/certificate.pem"
return session
- Token Management:
def manage_token():
token_expiry = datetime.now() + timedelta(minutes=30)
if datetime.now() >= token_expiry:
session, headers = authenticate()
- Resource Cleanup:
def cleanup_session():
try:
session.delete(f'https://{vcenter_server}/api/session')
except requests.exceptions.RequestException:
logger.warning("Failed to cleanup session")
finally:
session.close()
Best Practices for Implementation
- Use Environment Variables:
vcenter_server = os.environ.get('VCENTER_SERVER')
vcenter_user = os.environ.get('VCENTER_USER')
vcenter_password = os.environ.get('VCENTER_PASSWORD')
- Implement Logging:
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def log_api_call(func):
@wraps(func)
def wrapper(*args, **kwargs):
logger.info(f"Calling {func.__name__}")
return func(*args, **kwargs)
return wrapper
The full script
import requests
import urllib3
import os
import sys
from datetime import datetime, timedelta
from typing import Dict, Optional
from pyVim import connect
from pyVmomi import vim
import ssl
# Disable SSL warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class VSphereMonitor:
def __init__(self, vcenter_server: str, vcenter_user: str, vcenter_password: str):
self.vcenter_server = vcenter_server
self.vcenter_user = vcenter_user
self.vcenter_password = vcenter_password
self.si = None
self.content = None
def connect(self) -> bool:
"""Connect to vSphere using pyVmomi"""
try:
# Create SSL context
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
# Disable hostname verification first
context.check_hostname = False
# Then disable certificate verification
context.verify_mode = ssl.CERT_NONE
# Set minimum TLS version to 1.2
context.minimum_version = ssl.TLSVersion.TLSv1_2
self.si = connect.SmartConnect(
host=self.vcenter_server,
user=self.vcenter_user,
pwd=self.vcenter_password,
sslContext=context
)
self.content = self.si.RetrieveContent()
print("✅ Successfully connected to vSphere")
return True
except Exception as e:
print(f"❌ Connection failed: {str(e)}")
return False
def get_first_vm(self) -> Optional[vim.VirtualMachine]:
"""Get the first VM found in the inventory"""
try:
container = self.content.viewManager.CreateContainerView(
self.content.rootFolder, [vim.VirtualMachine], True
)
vms = container.view
if vms:
print(f"✅ Found VM: {vms[0].name}")
return vms[0]
else:
print("❌ No VMs found in inventory")
return None
finally:
if container:
container.Destroy()
def get_performance_metrics(self, vm: vim.VirtualMachine) -> Dict:
"""Get VM performance metrics"""
perf_manager = self.content.perfManager
# Define metrics to collect
metric_ids = [
vim.PerformanceManager.MetricId(counterId=x)
for x in [2, 24, 125, 143] # CPU Usage, Memory Usage, Disk Usage, Network Usage
]
# Query spec for last 5 minutes
end_time = datetime.now()
start_time = end_time - timedelta(minutes=5)
query = vim.PerformanceManager.QuerySpec(
maxSample=1,
entity=vm,
metricId=metric_ids,
startTime=start_time,
endTime=end_time,
intervalId=20
)
# Get the stats
stats = perf_manager.QueryStats(querySpec=[query])
if not stats:
return {}
# Process results
metrics = {}
counter_info = {c.key: c for c in perf_manager.perfCounter}
for val in stats[0].value:
counter = counter_info[val.id.counterId]
instance = val.id.instance if val.id.instance else "total"
value = val.value[0] if val.value else 0
# Format based on units
if counter.unitInfo.label == "percent":
value = f"{value}%"
elif counter.unitInfo.label == "kiloBytes":
value = f"{value/1024:.2f} MB"
elif counter.unitInfo.label == "kiloBytesPerSecond":
value = f"{value/1024:.2f} MB/s"
group = counter.groupInfo.key
if group not in metrics:
metrics[group] = {}
metrics[group][f"{counter.nameInfo.key} ({instance})"] = value
return metrics
def cleanup(self):
"""Disconnect from vSphere"""
if self.si:
connect.Disconnect(self.si)
print("✅ Successfully disconnected from vSphere")
def main():
# Get environment variables
vcenter_server = os.environ.get('VCENTER_SERVER')
vcenter_user = os.environ.get('VCENTER_USER')
vcenter_password = os.environ.get('VCENTER_PASSWORD')
# Validate environment variables
if not all([vcenter_server, vcenter_user, vcenter_password]):
print("❌ Please set all required environment variables:")
print(" VCENTER_SERVER, VCENTER_USER, VCENTER_PASSWORD")
sys.exit(1)
# Create monitor instance
monitor = VSphereMonitor(vcenter_server, vcenter_user, vcenter_password)
try:
# Connect to vSphere
if not monitor.connect():
sys.exit(1)
# Get first VM
vm = monitor.get_first_vm()
if not vm:
sys.exit(1)
# Get performance metrics
print("\nCollecting Performance Metrics...")
metrics = monitor.get_performance_metrics(vm)
if metrics:
print("\nPerformance Metrics:")
print("-" * 50)
for category, values in metrics.items():
print(f"\n{category.upper()}:")
for metric, value in values.items():
print(f" {metric}: {value}")
else:
print("❌ No performance metrics available")
except Exception as e:
print(f"❌ Unexpected error: {str(e)}")
finally:
monitor.cleanup()
if __name__ == "__main__":
main()
Conclusion
The modern vSphere REST API provides a more robust and secure way to interact with vSphere with Tanzu environments. Key benefits include:
- Better security with token-based authentication
- More consistent API endpoints
- Better support for modern development practices
- Improved error handling and response formats
Remember to:
- Always handle errors appropriately
- Implement proper certificate validation in production
- Use rate limiting for large-scale operations
- Clean up resources properly