From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from bombadil.infradead.org (bombadil.infradead.org [198.137.202.133]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 121C130C632 for ; Wed, 27 Aug 2025 21:29:04 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=198.137.202.133 ARC-Seal:i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756330147; cv=none; b=ikGG+lL2RmtDD5jIn3TSn3lMMyYMSNdMVTBHNZol0RYOWsSJP7/Oe1bYa0u3xVj/IkgwD3YUlyoDtQVAg78cg7lYqaW5k2+umU3Dsl66jxqSaGhphrm2+UgmZvxU75V3VFj69KUnhcG4NAyxrdG/hIedj73bwMCzzoe83M6BWvI= ARC-Message-Signature:i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756330147; c=relaxed/simple; bh=cXWADD9MdtCowIe919U/63GjNcyG3+hUAOJ8OOjCnJk=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=eDABB++QtqkfBR4WLdTMZ6MTuNlckn40nqTaSHNkoSDzaRRYVLee6fFmKzfO+VpQa7gn//BT+zaQ8F03tabYMCWl0zUV8j0v/5A5Sb8otbAboOcIfyeDydV4mNEWc7c5aClI8LPWWNxbiQMZPL64uu9LQo+nxWlnX+gjpSkP6fY= ARC-Authentication-Results:i=1; smtp.subspace.kernel.org; dmarc=fail (p=quarantine dis=none) header.from=kernel.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=sAsZxsF0; arc=none smtp.client-ip=198.137.202.133 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=quarantine dis=none) header.from=kernel.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="sAsZxsF0" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=bombadil.20210309; h=Sender:Content-Transfer-Encoding: Content-Type:MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc: To:From:Reply-To:Content-ID:Content-Description; bh=qLRZuAhh7zPdxJU6CxrCBdrvMti/Zriswv/3hFSJiIA=; b=sAsZxsF0XdNHJ1noMf3MQMTykJ OXDCRXsWSZMax7gfZDEFtFAiZ96Q428GOUzkd/LbzCJcs+nFXX4/WwHiaeHtIsib29ml9CJD5mebt XhX18ymETGc/Sk+0CMModyev/56BgI9yRNUCOXEll67rojtelqliA4/pE8lZOUSZObkC+V6QQPoqg PdmMRFkTQVJxKdZnkBtq2CcWsaGt8291rNQmh0raBQbRveRB9kulzSxrJHIiPM2qCID7s8zikY5N3 ODDqyhodAO6MzEoojWmMwZQdHVuYbJy+r9KuMFij1C6U0v4kS1INeK4U/+RKOclGNopfDqhztezBO 270wh8WQ==; Received: from mcgrof by bombadil.infradead.org with local (Exim 4.98.2 #2 (Red Hat Linux)) id 1urNhY-0000000GsJu-2Gkw; Wed, 27 Aug 2025 21:29:04 +0000 From: Luis Chamberlain To: Chuck Lever , Daniel Gomez , kdevops@lists.linux.dev Cc: Luis Chamberlain Subject: [PATCH v2 08/10] ansible/terraform: integrate Lambda Labs into build system Date: Wed, 27 Aug 2025 14:28:59 -0700 Message-ID: <20250827212902.4021990-9-mcgrof@kernel.org> X-Mailer: git-send-email 2.49.0 In-Reply-To: <20250827212902.4021990-1-mcgrof@kernel.org> References: <20250827212902.4021990-1-mcgrof@kernel.org> Precedence: bulk X-Mailing-List: kdevops@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sender: Luis Chamberlain Wire Lambda Labs into the kdevops build and provisioning system. This adds the necessary Ansible playbook tasks, terraform variable templates, and Makefile targets to support Lambda Labs operations. Integration includes: - Terraform tfvars template for Lambda Labs configuration - Ansible defaults for Lambda Labs variables - API key validation in terraform workflow - Capacity checking before provisioning - SSH key management in Makefile targets - Per-directory SSH key isolation in Kconfig - Updated shared terraform configuration The integration ensures: - API keys are validated before provisioning attempts - Capacity is checked to avoid failed provisions - SSH keys are managed automatically when configured - Each kdevops directory uses unique SSH keys Generated-by: Claude AI Signed-off-by: Luis Chamberlain --- playbooks/roles/gen_tfvars/defaults/main.yml | 23 ++++ .../templates/lambdalabs/terraform.tfvars.j2 | 18 +++ playbooks/roles/terraform/tasks/main.yml | 71 ++++++++++++ scripts/terraform.Makefile | 108 +++++++++++++++++- terraform/Kconfig.ssh | 37 +++++- terraform/shared.tf | 14 ++- 6 files changed, 259 insertions(+), 12 deletions(-) create mode 100644 playbooks/roles/gen_tfvars/templates/lambdalabs/terraform.tfvars.j2 diff --git a/playbooks/roles/gen_tfvars/defaults/main.yml b/playbooks/roles/gen_tfvars/defaults/main.yml index fce7afd..c9e531b 100644 --- a/playbooks/roles/gen_tfvars/defaults/main.yml +++ b/playbooks/roles/gen_tfvars/defaults/main.yml @@ -17,6 +17,17 @@ terraform_private_net_enabled: "false" terraform_private_net_prefix: "" terraform_private_net_mask: 0 +# AWS defaults - these prevent undefined variable errors when AWS is not selected +terraform_aws_profile: "default" +terraform_aws_region: "us-west-1" +terraform_aws_av_zone: "us-west-1c" +terraform_aws_ns: "debian-12" +terraform_aws_ami_owner: "136693071363" +terraform_aws_instance_type: "t2.micro" +terraform_aws_ebs_volumes_per_instance: "0" +terraform_aws_ebs_volume_size: 0 +terraform_aws_ebs_volume_type: "gp3" + terraform_oci_assign_public_ip: false terraform_oci_use_existing_vcn: false @@ -25,3 +36,15 @@ terraform_openstack_instance_prefix: "invalid" terraform_openstack_flavor: "invalid" terraform_openstack_image_name: "invalid" terraform_openstack_ssh_pubkey_name: "invalid" + +# Lambda Labs defaults +terraform_lambdalabs_region: "us-west-1" +terraform_lambdalabs_instance_type: "gpu_1x_a10" +terraform_lambdalabs_ssh_key_name: "kdevops-lambdalabs" +terraform_lambdalabs_image: "ubuntu-22.04" +terraform_lambdalabs_persistent_storage: false +terraform_lambdalabs_persistent_storage_size: 100 + +# SSH config defaults for templates +sshconfig: "~/.ssh/config" +sshconfig_fname: "~/.ssh/config" diff --git a/playbooks/roles/gen_tfvars/templates/lambdalabs/terraform.tfvars.j2 b/playbooks/roles/gen_tfvars/templates/lambdalabs/terraform.tfvars.j2 new file mode 100644 index 0000000..4fd8cad --- /dev/null +++ b/playbooks/roles/gen_tfvars/templates/lambdalabs/terraform.tfvars.j2 @@ -0,0 +1,18 @@ +lambdalabs_region = "{{ terraform_lambdalabs_region }}" +lambdalabs_instance_type = "{{ terraform_lambdalabs_instance_type }}" +lambdalabs_ssh_key_name = "{{ terraform_lambdalabs_ssh_key_name }}" +# Lambda Labs doesn't support OS image selection - always uses Ubuntu 22.04 + +ssh_config_pubkey_file = "{{ kdevops_terraform_ssh_config_pubkey_file }}" +ssh_config_privkey_file = "{{ kdevops_terraform_ssh_config_privkey_file }}" +ssh_config_user = "{{ kdevops_terraform_ssh_config_user }}" +ssh_config = "{{ sshconfig }}" +# Use unique SSH config file per directory to avoid conflicts +ssh_config_name = "{{ kdevops_ssh_config_prefix }}{{ topdir_path_sha256sum[:8] }}" + +ssh_config_update = {{ kdevops_terraform_ssh_config_update | lower }} +ssh_config_use_strict_settings = {{ kdevops_terraform_ssh_config_update_strict | lower }} +ssh_config_backup = {{ kdevops_terraform_ssh_config_update_backup | lower }} + +# Lambda Labs doesn't support extra storage volumes +# These lines are removed as the provider doesn't support this feature diff --git a/playbooks/roles/terraform/tasks/main.yml b/playbooks/roles/terraform/tasks/main.yml index a64c93c..a4dcbab 100644 --- a/playbooks/roles/terraform/tasks/main.yml +++ b/playbooks/roles/terraform/tasks/main.yml @@ -1,4 +1,75 @@ --- +- name: Check Lambda Labs API key configuration (if using Lambda Labs) + ansible.builtin.command: + cmd: "python3 {{ topdir_path }}/scripts/lambdalabs_credentials.py check" + register: api_key_check + failed_when: false + changed_when: false + when: + - kdevops_terraform_provider == "lambdalabs" + tags: + - bringup + - destroy + - status + +- name: Report Lambda Labs API key configuration status + ansible.builtin.fail: + msg: | + ERROR: Lambda Labs API key is not configured! + + To fix this, configure your Lambda Labs API key using one of these methods: + + Use the kdevops credentials management tool: + python3 scripts/lambdalabs_credentials.py set 'your-actual-api-key-here' + + Or manually create the credentials file: + mkdir -p ~/.lambdalabs + echo "[default]" > ~/.lambdalabs/credentials + echo "lambdalabs_api_key=your-actual-api-key-here" >> ~/.lambdalabs/credentials + chmod 600 ~/.lambdalabs/credentials + + Get your API key from: https://cloud.lambdalabs.com + when: + - kdevops_terraform_provider == "lambdalabs" + - api_key_check.rc != 0 + tags: + - bringup + - destroy + - status + +- name: Display Lambda Labs API key configuration status + ansible.builtin.debug: + msg: "{{ api_key_check.stdout }}" + when: + - kdevops_terraform_provider == "lambdalabs" + - api_key_check.rc == 0 + tags: + - bringup + - destroy + - status + +- name: Check Lambda Labs capacity before provisioning (if using Lambda Labs) + ansible.builtin.command: + cmd: "python3 {{ topdir_path }}/scripts/check_lambdalabs_capacity.py {{ terraform_lambdalabs_instance_type }} {{ terraform_lambdalabs_region }}" + register: capacity_check + failed_when: false + changed_when: false + when: + - kdevops_terraform_provider == "lambdalabs" + tags: + - bringup + +- name: Report Lambda Labs capacity check result + ansible.builtin.fail: + msg: "{{ capacity_check.stdout }}" + when: + - kdevops_terraform_provider == "lambdalabs" + - capacity_check.rc != 0 + tags: + - bringup + +# No longer needed - terraform reads directly from credentials file + - name: Bring up terraform resources cloud.terraform.terraform: force_init: true diff --git a/scripts/terraform.Makefile b/scripts/terraform.Makefile index 98a85e5..d1411a1 100644 --- a/scripts/terraform.Makefile +++ b/scripts/terraform.Makefile @@ -21,6 +21,9 @@ endif ifeq (y,$(CONFIG_TERRAFORM_OPENSTACK)) export KDEVOPS_CLOUD_PROVIDER=openstack endif +ifeq (y,$(CONFIG_TERRAFORM_LAMBDALABS)) +export KDEVOPS_CLOUD_PROVIDER=lambdalabs +endif KDEVOPS_NODES_TEMPLATE := $(KDEVOPS_NODES_ROLE_TEMPLATE_DIR)/terraform_nodes.tf.j2 KDEVOPS_NODES := terraform/$(KDEVOPS_CLOUD_PROVIDER)/nodes.tf @@ -99,7 +102,106 @@ endif # CONFIG_TERRAFORM_SSH_CONFIG_GENKEY ANSIBLE_EXTRA_ARGS += $(TERRAFORM_EXTRA_VARS) -bringup_terraform: +# Lambda Labs SSH key management +ifeq (y,$(CONFIG_TERRAFORM_LAMBDALABS)) + +LAMBDALABS_SSH_KEY_NAME := $(subst ",,$(CONFIG_TERRAFORM_LAMBDALABS_SSH_KEY_NAME)) + +ifeq (y,$(CONFIG_TERRAFORM_LAMBDALABS_SSH_KEY_AUTO_CREATE)) +# Auto-create mode: Always ensure key exists and create if missing +lambdalabs-ssh-check: $(KDEVOPS_SSH_PUBKEY) + @echo "Lambda Labs SSH key setup (auto-create mode)..." + @echo "Using SSH key name: $(LAMBDALABS_SSH_KEY_NAME)" + @if python3 scripts/lambdalabs_ssh_keys.py check "$(LAMBDALABS_SSH_KEY_NAME)" 2>/dev/null; then \ + echo "✓ SSH key already exists in Lambda Labs"; \ + else \ + echo "Creating new SSH key in Lambda Labs..."; \ + if python3 scripts/lambdalabs_ssh_keys.py add "$(LAMBDALABS_SSH_KEY_NAME)" "$(KDEVOPS_SSH_PUBKEY)"; then \ + echo "✓ Successfully created SSH key '$(LAMBDALABS_SSH_KEY_NAME)'"; \ + else \ + echo "========================================================"; \ + echo "ERROR: Could not create SSH key automatically"; \ + echo "========================================================"; \ + echo "Please check your Lambda Labs API key configuration:"; \ + echo " cat ~/.lambdalabs/credentials"; \ + echo ""; \ + echo "Or add the key manually:"; \ + echo "1. Go to: https://cloud.lambdalabs.com/ssh-keys"; \ + echo "2. Click 'Add SSH key'"; \ + echo "3. Name it: $(LAMBDALABS_SSH_KEY_NAME)"; \ + echo "4. Paste content from: $(KDEVOPS_SSH_PUBKEY)"; \ + echo "========================================================"; \ + exit 1; \ + fi \ + fi +else +# Manual mode: Just check if key exists +lambdalabs-ssh-check: $(KDEVOPS_SSH_PUBKEY) + @echo "Lambda Labs SSH key setup (manual mode)..." + @echo "Checking for SSH key: $(LAMBDALABS_SSH_KEY_NAME)" + @if python3 scripts/lambdalabs_ssh_keys.py check "$(LAMBDALABS_SSH_KEY_NAME)" 2>/dev/null; then \ + echo "✓ SSH key exists in Lambda Labs"; \ + else \ + echo "========================================================"; \ + echo "ERROR: SSH key not found"; \ + echo "========================================================"; \ + echo "The SSH key '$(LAMBDALABS_SSH_KEY_NAME)' does not exist."; \ + echo ""; \ + echo "Please add your SSH key manually:"; \ + echo "1. Go to: https://cloud.lambdalabs.com/ssh-keys"; \ + echo "2. Click 'Add SSH key'"; \ + echo "3. Name it: $(LAMBDALABS_SSH_KEY_NAME)"; \ + echo "4. Paste content from: $(KDEVOPS_SSH_PUBKEY)"; \ + echo "========================================================"; \ + exit 1; \ + fi +endif + +lambdalabs-ssh-setup: $(KDEVOPS_SSH_PUBKEY) + @echo "Setting up Lambda Labs SSH key..." + @python3 scripts/lambdalabs_ssh_keys.py add "$(LAMBDALABS_SSH_KEY_NAME)" "$(KDEVOPS_SSH_PUBKEY)" || true + @python3 scripts/lambdalabs_ssh_keys.py list + +lambdalabs-ssh-list: + @echo "Current Lambda Labs SSH keys:" + @python3 scripts/lambdalabs_ssh_keys.py list + +lambdalabs-ssh-clean: +ifeq (y,$(CONFIG_TERRAFORM_LAMBDALABS_SSH_KEY_AUTO_CREATE)) + @echo "Cleaning up auto-created SSH key '$(LAMBDALABS_SSH_KEY_NAME)'..." + @if python3 scripts/lambdalabs_ssh_keys.py check "$(LAMBDALABS_SSH_KEY_NAME)" 2>/dev/null; then \ + echo "Removing SSH key from Lambda Labs..."; \ + python3 scripts/lambdalabs_ssh_keys.py delete "$(LAMBDALABS_SSH_KEY_NAME)" || true; \ + else \ + echo "SSH key not found, nothing to clean"; \ + fi +else + @echo "Manual SSH key mode - not removing key '$(LAMBDALABS_SSH_KEY_NAME)'" + @echo "To remove manually, run: python3 scripts/lambdalabs_ssh_keys.py delete $(LAMBDALABS_SSH_KEY_NAME)" +endif + +else +lambdalabs-ssh-check: + @true +lambdalabs-ssh-setup: + @true +lambdalabs-ssh-list: + @echo "Lambda Labs provider not configured" +lambdalabs-ssh-clean: + @true +lambdalabs-ssh-clean-after: + @true +endif + +# Handle cleanup after destroy for Lambda Labs +ifeq (y,$(CONFIG_TERRAFORM_LAMBDALABS)) +ifeq (y,$(CONFIG_TERRAFORM_LAMBDALABS_SSH_KEY_AUTO_CREATE)) +lambdalabs-ssh-clean-after: + @$(MAKE) lambdalabs-ssh-clean +endif +endif + +bringup_terraform: lambdalabs-ssh-check $(Q)ansible-playbook $(ANSIBLE_VERBOSE) \ --inventory localhost, \ playbooks/terraform.yml --tags bringup \ @@ -119,7 +221,9 @@ status_terraform: playbooks/terraform.yml --tags status \ --extra-vars=@./extra_vars.yaml -destroy_terraform: +destroy_terraform: destroy_terraform_base lambdalabs-ssh-clean-after + +destroy_terraform_base: $(Q)ansible-playbook $(ANSIBLE_VERBOSE) \ --inventory localhost, \ playbooks/terraform.yml --tags destroy \ diff --git a/terraform/Kconfig.ssh b/terraform/Kconfig.ssh index 1c5e096..8a19d7c 100644 --- a/terraform/Kconfig.ssh +++ b/terraform/Kconfig.ssh @@ -1,26 +1,53 @@ config TERRAFORM_SSH_USER_INFER bool "Selecting this will infer your username from you local system" - default y + default y if !TERRAFORM_LAMBDALABS + default n if TERRAFORM_LAMBDALABS help If enabled we and you are running 'make menuconfig' as user sonia, then we'd infer this and peg sonia as the default user name for you. We'll simply run $(shell echo $USER). + Note: This is automatically disabled for Lambda Labs since they + don't support custom SSH users. + config TERRAFORM_SSH_CONFIG_USER string "The username to create on the target systems" - default $(shell, echo $USER) if TERRAFORM_SSH_USER_INFER - default "admin" if !TERRAFORM_SSH_USER_INFER + default $(shell, echo $USER) if TERRAFORM_SSH_USER_INFER && !TERRAFORM_LAMBDALABS + default "ubuntu" if TERRAFORM_LAMBDALABS + default "admin" if !TERRAFORM_SSH_USER_INFER && !TERRAFORM_LAMBDALABS help - The ssh public key which will be pegged onto the systems's - ~/.ssh/authorized_keys file so you can log in. + The SSH username to use for connecting to the target systems. + + For Lambda Labs, this is set to 'ubuntu' as Lambda Labs doesn't + support custom users and typically deploys Ubuntu instances. + + For other providers, this will be inferred from your local username + or set to a default value. config TERRAFORM_SSH_CONFIG_PUBKEY_FILE string "The ssh public key to use to log in" + default "~/.ssh/kdevops_terraform_$(shell, echo $(TOPDIR_PATH) | sha256sum | cut -c1-8).pub" if TERRAFORM_LAMBDALABS default "~/.ssh/kdevops_terraform.pub" help The ssh public key which will be pegged onto the systems's ~/.ssh/authorized_keys file so you can log in. + For Lambda Labs, the key path is made unique per directory by appending + the directory checksum to avoid conflicts when running multiple kdevops + instances. + +config TERRAFORM_SSH_CONFIG_PRIVKEY_FILE + string "The ssh private key file for authentication" + default "~/.ssh/kdevops_terraform_$(shell, echo $(TOPDIR_PATH) | sha256sum | cut -c1-8)" if TERRAFORM_LAMBDALABS + default "~/.ssh/kdevops_terraform" + help + The ssh private key file used for authenticating to the systems. + This should correspond to the public key specified above. + + For Lambda Labs, the key path is made unique per directory by appending + the directory checksum to avoid conflicts when running multiple kdevops + instances. + config TERRAFORM_SSH_CONFIG_GENKEY bool "Should we create a new random key for you?" default y diff --git a/terraform/shared.tf b/terraform/shared.tf index ff55b20..88e87a2 100644 --- a/terraform/shared.tf +++ b/terraform/shared.tf @@ -4,8 +4,8 @@ # order does not matter as terraform is declarative. variable "ssh_config" { - description = "Path to your ssh_config" - default = "~/.ssh/config" + description = "Path to SSH config update script" + default = "../scripts/update_ssh_config_lambdalabs.py" } variable "ssh_config_update" { @@ -13,11 +13,10 @@ variable "ssh_config_update" { type = bool } -# Debian AWS ami's use admin as the default user, we override it with cloud-init -# for whatever username you set here. +# Lambda Labs instances use ubuntu as the default user variable "ssh_config_user" { description = "If ssh_config_update is true, and this is set, it will be the user set for each host on your ssh config" - default = "admin" + default = "ubuntu" } variable "ssh_config_pubkey_file" { @@ -25,6 +24,11 @@ variable "ssh_config_pubkey_file" { default = "~/.ssh/kdevops_terraform.pub" } +variable "ssh_config_privkey_file" { + description = "Path to the ssh private key file for authentication" + default = "~/.ssh/kdevops_terraform" +} + variable "ssh_config_use_strict_settings" { description = "Whether or not to use strict settings on ssh_config" type = bool -- 2.50.1