package server

import (
	"context"
	"errors"
	"fmt"
	"net/http"
	"time"

	"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v18/internal/api"
	"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v18/internal/gitlab"
	gapi "gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v18/internal/gitlab/api"
	"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v18/internal/module/modserver"
	"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v18/internal/module/runner/rpc"
	runner_controller_rpc "gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v18/internal/module/runner_controller/rpc"
	"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v18/internal/tool/grpctool"
	"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v18/internal/tool/httpz"
	"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v18/internal/tool/logz"
	"go.opentelemetry.io/otel/attribute"
	otelmetric "go.opentelemetry.io/otel/metric"
	"google.golang.org/grpc"
	"google.golang.org/grpc/codes"
	"google.golang.org/grpc/metadata"
	"google.golang.org/grpc/status"
	"google.golang.org/protobuf/encoding/protojson"
)

const (
	requestIDMetadataKey = "x-request-id" // lowercase version of X-Request-ID

	// admitJobTimeout the timeout to wait for each individual Runner Controller to respond within to admit job.
	admitJobTimeout = 10 * time.Second
	// updateJobTimeout the timeout to wait when updating a job.
	updateJobTimeout = 10 * time.Second

	jobFailedState               = "failed"
	jobRunnerSystemFailureReason = "runner_system_failure"

	admitJobResultMetricAttributeName = "result"
)

var (
	errAdmissionControlDisabled = errors.New("admission control is disabled")
	errNoAdmissionControllers   = errors.New("no admission controllers")

	admissionControlDisabledMetricOptions = []otelmetric.AddOption{otelmetric.WithAttributeSet(attribute.NewSet(attribute.String(admitJobResultMetricAttributeName, "disabled")))}
	admissionControlFailedMetricOptions   = []otelmetric.AddOption{otelmetric.WithAttributeSet(attribute.NewSet(attribute.String(admitJobResultMetricAttributeName, "failed")))}
	admissionControlAdmittedMetricOptions = []otelmetric.AddOption{otelmetric.WithAttributeSet(attribute.NewSet(attribute.String(admitJobResultMetricAttributeName, "admitted")))}
	admissionControlRejectedMetricOptions = []otelmetric.AddOption{otelmetric.WithAttributeSet(attribute.NewSet(attribute.String(admitJobResultMetricAttributeName, "rejected")))}

	admissionControlRequestFailedMetricOptions   = []otelmetric.AddOption{otelmetric.WithAttributeSet(attribute.NewSet(attribute.String(admitJobResultMetricAttributeName, "failed")))}
	admissionControlRequestAdmittedMetricOptions = []otelmetric.AddOption{otelmetric.WithAttributeSet(attribute.NewSet(attribute.String(admitJobResultMetricAttributeName, "admitted")))}
	admissionControlRequestRejectedMetricOptions = []otelmetric.AddOption{otelmetric.WithAttributeSet(attribute.NewSet(attribute.String(admitJobResultMetricAttributeName, "rejected")))}
)

type notAdmittedError struct {
	runnerController api.AgentKey
	reason           string
}

func (e *notAdmittedError) Error() string {
	return "runner controller did not admit job"
}

type admitJobError struct {
	runnerController api.AgentKey
	inner            error
}

func (e *admitJobError) Error() string {
	return "runner controller failed to admit job"
}

func (e *admitJobError) Unwrap() error {
	return e.inner
}

type admissionControllerClient interface {
	AdmitJob(ctx context.Context, agentKey api.AgentKey, req *runner_controller_rpc.AdmitJobRequest) (*runner_controller_rpc.AdmitJobResponse, error)
}

type server struct {
	rpc.UnsafeJobRouterServer
	gitlabClient                         gitlab.ClientInterface
	newAdmissionControllerClient         func() admissionControllerClient
	admissionControlTotalCounter         otelmetric.Int64Counter
	admissionControlRequestsTotalCounter otelmetric.Int64Counter
}

func (s *server) GetJob(ctx context.Context, req *rpc.GetJobRequest) (*rpc.GetJobResponse, error) {
	// NOTE: we don't do any authentication here,
	// we just pass the request to GitLab along with the Runner-supplied authentication token.
	rpcAPI := modserver.AgentRPCAPIFromContext(ctx)
	log := rpcAPI.Log()

	var opts []gitlab.DoOption

	reqID := metadata.ValueFromIncomingContext(ctx, requestIDMetadataKey)
	if len(reqID) > 0 {
		opts = append(opts,
			gitlab.WithHeader(http.Header{
				httpz.RequestIDHeader: reqID,
			}),
		)

		log = rpcAPI.Log().With(logz.RequestID(reqID[0]))
	}

	runnerToken := rpcAPI.AgentTokenWithType().Token
	respStatusCode, respBody, respID, err := gapi.GetRawCIJobs( // handles HTTP 201 and 204
		ctx,
		s.gitlabClient,
		runnerToken,
		req.JobRequest,
		opts...,
	)

	// we always want to set these headers, so we do it before even checking the API errors.
	if respID != "" {
		err = grpc.SetHeader(ctx, metadata.Pairs(
			requestIDMetadataKey, respID,
		))
		if err != nil { // this should never happen
			rpcAPI.HandleProcessingError(log, "Failed to set response metadata", err)
			// continue anyway
		}
	}

	if err != nil {
		code := codes.Unavailable
		var ce *gitlab.ClientError
		if errors.As(err, &ce) {
			switch ce.StatusCode {
			case http.StatusForbidden:
				code = codes.PermissionDenied
			case http.StatusTooManyRequests:
				code = codes.ResourceExhausted
			case http.StatusConflict, http.StatusUnprocessableEntity:
				code = codes.FailedPrecondition
			}
		}
		return nil, status.Error(code, err.Error())
	}

	respBodyBytes := respBody.Materialize()
	respBody.Free() // we can get rid of it.

	if respStatusCode != http.StatusCreated {
		return &rpc.GetJobResponse{
			JobResponse: respBodyBytes,
		}, nil
	}

	// check if we got a job and if so run admission control
	jobResponse := &rpc.JobResponse{}
	// unmarshal respBody into a view into the JobResponse.
	o := protojson.UnmarshalOptions{DiscardUnknown: true} // we only want to read certain fields
	if err = o.Unmarshal(respBodyBytes, jobResponse); err != nil {
		rpcAPI.HandleProcessingError(log, "Failed to unmarshal JobResponse", err)
		return nil, status.Errorf(codes.Unavailable, "Failed to unmarshal JobResponse: %v", err)
	}

	log = log.With(logz.CIJobID(jobResponse.JobId))

	log.Debug("Retrieving runner controllers for job admission")

	err = s.admitJob(ctx, runnerToken, jobResponse, opts)
	switch err {
	case nil:
		s.admissionControlTotalCounter.Add(context.Background(), 1, admissionControlAdmittedMetricOptions...) //nolint:contextcheck
		log.Debug("Admitted job")
	case errNoAdmissionControllers: //nolint:errorlint
		s.admissionControlTotalCounter.Add(context.Background(), 1, admissionControlAdmittedMetricOptions...) //nolint:contextcheck
		// we don't have any admission controllers to check, we admit.
		log.Debug("No runner admission controllers, admitted job")
	case errAdmissionControlDisabled:
		s.admissionControlTotalCounter.Add(context.Background(), 1, admissionControlDisabledMetricOptions...) //nolint:contextcheck
		// we just continue and don't log either to prevent spamming non-enterprise instances
	default: // err != nil
		var notAdmittedErr *notAdmittedError
		var admitJobErr *admitJobError
		switch {
		case errors.As(err, &notAdmittedErr):
			s.admissionControlTotalCounter.Add(context.Background(), 1, admissionControlRejectedMetricOptions...) //nolint:contextcheck
			log.Info("Runner Controller did not admit job", logz.AgentKey(notAdmittedErr.runnerController), logz.Reason(notAdmittedErr.reason))
		case errors.As(err, &admitJobErr):
			s.admissionControlTotalCounter.Add(context.Background(), 1, admissionControlFailedMetricOptions...) //nolint:contextcheck
			rpcAPI.HandleProcessingError(log, "Unable to perform admission control", err, logz.AgentKey(admitJobErr.runnerController))
		default:
			s.admissionControlTotalCounter.Add(context.Background(), 1, admissionControlFailedMetricOptions...) //nolint:contextcheck
			rpcAPI.HandleProcessingError(log, "Unable to perform admission control", err)
		}

		// We want to update the job even on cancellation.
		updateJobCtx, updateJobCancel := context.WithTimeout(context.WithoutCancel(ctx), updateJobTimeout)
		defer updateJobCancel()

		jobFailErr := gapi.UpdateCIJob(updateJobCtx, s.gitlabClient, jobResponse.JobId, &gapi.UpdateCIJobAPIRequest{
			JobToken:      jobResponse.JobToken,
			State:         jobFailedState,
			FailureReason: jobRunnerSystemFailureReason,
		}, opts...)

		if jobFailErr != nil {
			rpcAPI.HandleProcessingError(log, "Failed to let job fail because it was not admitted", jobFailErr)
			// we continue anyways with returning the original error
		}

		// This should be transparent to the runner. It should just request a new job (to get new correlation ids etc)
		return &rpc.GetJobResponse{}, nil
	}

	return &rpc.GetJobResponse{
		JobResponse: respBodyBytes,
	}, nil
}

func (s *server) admitJob(ctx context.Context, runnerToken api.AgentToken, jobResponse *rpc.JobResponse, opts []gitlab.DoOption) error {
	// 0. Ask for Runner Controllers that are applicable for this request.
	resp, err := gapi.GetRunnerControllersForJobAdmission(ctx, s.gitlabClient, runnerToken, opts...)
	if err != nil {
		var ce *gitlab.ClientError
		if errors.As(err, &ce) {
			switch ce.StatusCode {
			case http.StatusNotFound:
				// Rails is running a non enterprise instance, we don't have admission control, so we let it pass!
				return errAdmissionControlDisabled
			case http.StatusNotImplemented:
				// Rails is an enterprise instance, but we don't have admission control (probably because of disabled feature flag), so we let it pass!
				return errAdmissionControlDisabled
			}
		}

		return err
	}

	// If we have at least one runner controller, count this as an admission control attempt
	if len(resp.RunnerControllers) == 0 {
		return errNoAdmissionControllers
	}

	// construct runner controller client
	client := s.newAdmissionControllerClient()

	// construct request
	req := &runner_controller_rpc.AdmitJobRequest{
		ProjectId:       jobResponse.JobInfo.ProjectId,
		ProjectName:     jobResponse.JobInfo.ProjectName,
		ProjectFullPath: jobResponse.JobInfo.ProjectFullPath,
		NamespaceId:     jobResponse.JobInfo.NamespaceId,
		RootNamespaceId: jobResponse.JobInfo.RootNamespaceId,
		OrganizationId:  jobResponse.JobInfo.OrganizationId,
		UserId:          jobResponse.JobInfo.UserId,
		Image: &runner_controller_rpc.Image{
			Name: jobResponse.JobImage.GetName(),
		},
	}

	// 1. Reach out to runner controllers sequentially (we can optimize this later)
	for _, rc := range resp.RunnerControllers {
		err := func() error {
			admitJobCtx, admitJobCancel := context.WithTimeout(ctx, admitJobTimeout)
			defer admitJobCancel()

			agentKey := api.AgentKey{ID: rc.Id, Type: api.AgentTypeRunnerController}
			resp, err := client.AdmitJob(admitJobCtx, agentKey, req)

			// 2a. Check if job should not be admitted
			if err != nil {
				s.admissionControlRequestsTotalCounter.Add(context.Background(), 1, admissionControlRequestFailedMetricOptions...) //nolint:contextcheck

				if grpctool.RequestTimedOut(err) {
					err = fmt.Errorf("timed out within %.2f seconds during job admission request", admitJobTimeout.Seconds())
				}
				return &admitJobError{runnerController: agentKey, inner: err}
			}

			switch r := resp.AdmissionResponse.(type) {
			case *runner_controller_rpc.AdmitJobResponse_Admitted:
				// yay, next ...!
				s.admissionControlRequestsTotalCounter.Add(context.Background(), 1, admissionControlRequestAdmittedMetricOptions...) //nolint:contextcheck
				return nil
			case *runner_controller_rpc.AdmitJobResponse_Rejected:
				s.admissionControlRequestsTotalCounter.Add(context.Background(), 1, admissionControlRequestRejectedMetricOptions...) //nolint:contextcheck
				return &notAdmittedError{runnerController: agentKey, reason: r.Rejected.Reason}
			default: // this should never happen!
				s.admissionControlRequestsTotalCounter.Add(context.Background(), 1, admissionControlRequestFailedMetricOptions...) //nolint:contextcheck
				return &admitJobError{runnerController: agentKey, inner: fmt.Errorf("unexpected runner controller AdmitJob response type: %#v", resp.AdmissionResponse)}
			}
		}()

		if err != nil {
			return err
		}
	}

	// 2b. Admit job
	return nil
}
