nix-ai/modules/hydra.nix
2024-10-27 22:49:57 +01:00

231 lines
7.1 KiB
Nix

{ config, lib, pkgs, ... }:
let
cfg = config.services.hydra;
hasAtticd = lib.hasAttr "atticd" config.services;
aiServer = { lib, ... }: {
options = {
buildServer = lib.mkEnableOption "Weather to also build on the server";
sshUser = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "The SSH user to use for the AI server";
};
sshKey = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "The SSH key file to use for the AI server";
};
publicHostKey = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = ''
The (base64-encoded) public host key of this builder. The field
is calculated via {command}`base64 -w0 /etc/ssh/ssh_host_type_key.pub`.
If null, SSH will use its regular known-hosts file when connecting.
'';
};
gpuType = lib.mkOption {
type = lib.types.str;
default = "";
example = "A6000-1";
description = "The type of GPU to use for the AI server";
};
speedFactor = lib.mkOption {
type = lib.types.ints.unsigned;
default = 8;
description = "The speed factor of the AI server";
};
};
};
in
{
options = {
services.hydra = {
enforceGpuUsage = lib.mkEnableOption "Enforce GPU usage";
openFirewall = lib.mkEnableOption "Open Firewall ports for Hydra and Attic";
buildService = {
enable = lib.mkEnableOption "Enable the build service";
sshKey = lib.mkOption {
type = lib.types.str;
description = "The SSH key file to use for the build service";
};
};
aiServers = lib.mkOption {
type = lib.types.attrsOf (lib.types.submodule aiServer);
default = { };
description = "Attrset of AI servers to use for building";
};
atticd = {
enable = lib.mkEnableOption "Enable atticd service";
autoWatchStore = lib.mkOption {
type = lib.types.bool;
default = true;
description = "Systemd service for updating store entries";
};
domain = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = "localhost:${ toString cfg.atticd.port }";
example = "attic.wavelens.io";
description = "The domain name of the atticd service";
};
port = lib.mkOption {
type = lib.types.ints.unsigned;
default = 8183;
description = "The port on which atticd listens";
};
credentialsFile = lib.mkOption {
type = lib.types.nullOr lib.types.path;
default = null;
description = "The path to the atticd credentials file";
};
};
};
};
config = lib.mkIf cfg.enable {
networking.firewall.allowedTCPPorts = lib.mkIf cfg.openFirewall ([ cfg.port ] ++ lib.optional cfg.atticd.enable cfg.atticd.port);
nix = {
extraOptions = ''
allowed-uris = github: gitlab: git+https:// git+ssh:// https://
builders-use-substitutes = true
'';
settings = {
experimental-features = [ "nix-command" "flakes" "ca-derivations" ];
substituters = [ "https://attic.wavelens.io/main?priority=5&want-mass-query=true" ];
trusted-public-keys = [ "main:3VVGDhOgY/x5hn7XIkVhqjEjHvOnU7o1cPlrWv91Mko=" ];
};
buildMachines = (lib.mapAttrsToList
(hostName: machine: {
inherit hostName;
inherit (machine) sshUser sshKey publicHostKey speedFactor;
system = "x86_64-linux";
protocol = "ssh";
maxJobs = 1;
mandatoryFeatures = lib.optional cfg.enforceGpuUsage "cuda";
supportedFeatures = [
"cuda"
"service"
"ca-derivations"
] ++ lib.optionals machine.buildServer [
"nixos-test"
"benchmark"
"big-parallel"
] ++ lib.optional (machine.gpuType != "") "gpu-${machine.gpuType}";
})
cfg.aiServers)
++ lib.optionals cfg.buildService.enable (
let
gpus = [ "CPU" "A4000-1" "L40-1" "A6000-1" "A6000-4" "A100-1" "A100-2" "A100-4" "A100-8" "H100-1" "H100-2" "H100-4" "H100-8" ];
in
map
(gpu: {
hostName = "WVLS-${gpu}";
system = "x86_64-linux";
protocol = "ssh";
speedFactor = 1;
maxJobs = 100;
mandatoryFeatures = [ "service" ]
++ lib.optional (gpu != "CPU") "cuda";
supportedFeatures = [ "service" "ca-derivations" ]
++ lib.optional (gpu != "CPU") "cuda"
++ lib.optional (gpu != "CPU") "gpu-${gpu}";
})
gpus
);
};
environment.systemPackages = lib.mkIf cfg.atticd.enable (with pkgs; [
attic-server
attic-client
]);
systemd.services.attic-watch-store = lib.mkIf (cfg.atticd.enable && cfg.atticd.autoWatchStore) {
description = "Upload all store content to binary cache";
wantedBy = [ "multi-user.target" ];
after = [
"network-online.target"
"atticd.service"
];
requires = [
"network-online.target"
"atticd.service"
];
serviceConfig = {
User = "root";
Restart = "always";
ExecStart = "${pkgs.attic-server}/bin/attic watch-store main";
};
};
services = lib.mkMerge [{
hydra = {
useSubstitutes = lib.mkDefault true;
minimumDiskFree = lib.mkDefault 5;
minimumDiskFreeEvaluator = lib.mkDefault 10;
port = lib.mkDefault 4444;
hydraURL = lib.mkDefault "http://0.0.0.0:${ toString cfg.port }";
notificationSender = lib.mkDefault "hydra@example.local";
extraConfig = ''
max_output_size = ${toString (10 * 1024 * 1024 * 1024) }
<git-input>
timeout = 3600
</git-input>
'';
};
postgresql = {
enable = true;
ensureDatabases = lib.optional cfg.atticd.enable "atticd";
identMap = ''
# ArbitraryMapName systemUser DBUser
superuser_map root postgres
# Let other names login as themselves
superuser_map /^(.*)$ \1
'';
ensureUsers = lib.optional cfg.atticd.enable {
name = "atticd";
ensureDBOwnership = true;
};
};
}
(lib.optionalAttrs hasAtticd {
atticd = {
enable = true;
environmentFile = cfg.atticd.credentialsFile;
settings = {
listen = "0.0.0.0:${ toString cfg.atticd.port }";
allowed-hosts = lib.optional (cfg.atticd.domain != null) cfg.atticd.domain;
api-endpoint = "https://${cfg.atticd.domain}";
database = {
url = "postgres://atticd?host=/run/postgresql";
heartbeat = true;
};
chunking = {
nar-size-threshold = 64 * 1024; # 64 KiB
min-size = 16 * 1024; # 16 KiB
avg-size = 64 * 1024; # 64 KiB
max-size = 256 * 1024; # 256 KiB
};
};
};
})];
};
}