Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ require (
github.com/elazarl/go-bindata-assetfs v1.0.1
github.com/go-logr/logr v1.4.3
github.com/go-logr/zerologr v1.2.3
github.com/golang/mock v1.6.0
github.com/golang/mock v1.7.0-rc.1
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0
Expand Down Expand Up @@ -123,3 +123,5 @@ require (
)

replace github.com/ray-project/kuberay/ray-operator => ./ray-operator

replace google.golang.org/genproto => google.golang.org/genproto v0.0.0-20250707201910-8d1bb00bc6a7
1,778 changes: 1,771 additions & 7 deletions go.sum

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions go.work
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
go 1.24.0

use ./historyserver

use ./ray-operator

use .
1,787 changes: 1,787 additions & 0 deletions go.work.sum

Large diffs are not rendered by default.

33 changes: 33 additions & 0 deletions historyserver/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
ARG TARGETOS
ARG TARGETARCH

FROM --platform=$BUILDPLATFORM golang:1.25.1 as builder
ENV GOPROXY=https://goproxy.cn,direct
ARG BUILD_RAYSERVER_DASHBOARD

RUN if [ "$BUILD_RAYSERVER_DASHBOARD" = "yes" ] ; then \
curl -o install.sh https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh && chmod +x install.sh && ./install.sh && /bin/bash -c "source $HOME/.nvm/nvm.sh && nvm install 14 && nvm use 14" ;\
else \
echo "$BUILD_RAYSERVER_DASHBOARD not yes, no need install nvm"; \
fi

WORKDIR /historyserver
COPY . .

RUN if [ "$BUILD_RAYSERVER_DASHBOARD" = "yes" ] ; then \
/bin/bash -c "source $HOME/.nvm/nvm.sh && cd dashboard/v2.51.0/client && npm ci && npm run build" ;\
else \
mkdir -p dashboard/v2.51.0/client/build ;\
echo "do not npm run build"; \
fi

RUN make build GOOS=${TARGETOS} GOARCH=${TARGETARCH}

FROM ubuntu:22.04

RUN apt-get update && apt-get upgrade -y && rm -rf /var/cache/apt/ && apt-get install -y ca-certificates

COPY --from=builder /historyserver/output/bin/historyserver /usr/local/bin/historyserver
COPY --from=builder /historyserver/output/bin/collector /usr/local/bin/collector
COPY --from=builder /historyserver/dashboard/v2.51.0/client/build /dashboard/v2.51.0/client/build
COPY --from=builder /historyserver/dashboard/homepage /dashboard/homepage
140 changes: 140 additions & 0 deletions historyserver/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Image URL to use all building/pushing image targets
GOLANGCILINT_VERSION ?= v1.59.0
GOBIN := $(shell go env GOPATH)/bin
GOBIN_GOLANGCILINT := $(GOBIN)/golangci-lint

DOCKERBUILDER_INSTANCE=historyserver
OUT_DIR=output
BIN_DIR=$(OUT_DIR)/bin
BINARY_NAME=historyserver
BINARY_NAME_COLLECTOR=collector

# Setting SHELL to bash allows bash commands to be executed by recipes.
# Options are set to exit when a recipe line exits non-zero or a piped command fails.
SHELL = /usr/bin/env bash -o pipefail
BUILD_TIMESTAMP = $(shell date -u +"%Y-%m-%dT%H:%M:%SZ")
COMMIT_SHORT ?= $(shell git rev-parse --short HEAD)
BRANCH ?= $(shell git branch --show-current)
VERSION ?= $(shell git describe --tags --long|awk -F '-' '{print $$1"."$$2"-"$$3""}')

PACKAGE = gitlab.alibaba-inc.com/eml/historyserver

GO_LDFLAGS := -extldflags "-static"
# GO_LDFLAGS += -w -s # Drop debugging symbols.
GO_LDFLAGS += -X $(PACKAGE)/pkg.Version=$(VERSION) \
-X $(PACKAGE)/pkg.CommitID=$(COMMIT_SHORT) \
-X $(PACKAGE)/pkg.BuildDate=$(BUILD_TIMESTAMP) \
-X $(PACKAGE)/pkg.Branch=$(BRANCH)
GO_BUILD_FLAGS := -ldflags '$(GO_LDFLAGS)'

GOOS ?= darwin
GOARCH ?= amd64

.PHONY: all

all: build

.PHONY: clean
clean:
rm -rf $(OUT_DIR)

.PHONY: build
build: buildcollector buildhistoryserver

.PHONY: buildcollector
#build: mod alllint test
buildcollector: mod
@echo ""
@echo "go build ..."
CGO_ENABLED=0 GOOS=$(GOOS) GOARCH=$(GOARCH) go build -v $(GO_BUILD_FLAGS) -o $(BIN_DIR)/$(BINARY_NAME_COLLECTOR) ./cmd/collector/main.go

.PHONY: buildhistoryserver
#build: mod alllint test
buildhistoryserver: mod
@echo ""
@echo "go build ..."
CGO_ENABLED=0 GOOS=$(GOOS) GOARCH=$(GOARCH) go build -v $(GO_BUILD_FLAGS) -o $(BIN_DIR)/$(BINARY_NAME) ./cmd/historyserver/main.go

.PHONY: simplebuild
simplebuild:
@echo ""
@echo "go build ..."
CGO_ENABLED=0 GOOS=$(GOOS) GOARCH=$(GOARCH) go build -v $(GO_BUILD_FLAGS) -o $(BIN_DIR)/$(BINARY_NAME)

.PHONY: mod
mod:
go mod tidy

.PHONY: localimage
localimage: dockerbuilder_instance
docker buildx build -t historyserver:laster --platform linux/amd64 . --load

.PHONY: dockerbuilder_instance
dockerbuilder_instance:
@docker buildx use $(DOCKERBUILDER_INSTANCE) || docker buildx create --name $(DOCKERBUILDER_INSTANCE)
docker buildx use $(DOCKERBUILDER_INSTANCE)

# Run tests
.PHONY: test
test:
go test -v ./pkg/... ./cmd/...

.PHONY: alllint
alllint: todolist issuelint ## Run go lint against code.

.PHONY: issuelint
issuelint: install-golint
@echo ""
@echo "-------------------- show issues info, if has issuse, return error --------------------"
$(GOBIN_GOLANGCILINT) run -v --print-resources-usage -c .golangci.yaml

.PHONY: todolist
todolist: install-golint ## Run go lint against code.
@echo ""
@echo "-------------------- only show TODO list info --------------------"
$(GOBIN_GOLANGCILINT) run --print-resources-usage -c .golangci.info.yaml --enable-only godox
@echo ""

install-golint: ## check golint if not exist install golint tools
ifneq ("$(wildcard $(GOBIN_GOLANGCILINT))","")
ifeq ($(shell $(GOBIN_GOLANGCILINT) version --format short), $(GOLANGCILINT_VERSION))
@echo "golangci-lint version match"
else
@echo "golangci-lint version do not match"
@{ \
set -e ;\
echo 'installing golangci-lint-$(GOLANGCILINT_VERSION)' ;\
go install github.com/golangci/golangci-lint/cmd/golangci-lint@$(GOLANGCILINT_VERSION) ;\
echo 'Successfully installed' ;\
}
endif
else
@echo "golangci-lint not exist"
@{ \
set -e ;\
echo 'installing golangci-lint-$(GOLANGCILINT_VERSION)' ;\
go install github.com/golangci/golangci-lint/cmd/golangci-lint@$(GOLANGCILINT_VERSION) ;\
echo 'Successfully installed' ;\
}
endif



# Generate manifests e.g. CRD, RBAC etc.
#manifests: controller-gen
# $(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=manager-role webhook paths="./..." output:crd:artifacts:config=config/crd/bases

# Generate code
#generate: controller-gen
# $(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...


# find or download controller-gen
# download controller-gen if necessary
#controller-gen:
#ifeq (, $(shell which controller-gen))
# go get sigs.k8s.io/controller-tools/cmd/[email protected]
#CONTROLLER_GEN=$(shell go env GOPATH)/bin/controller-gen
#else
#CONTROLLER_GEN=$(shell which controller-gen)
#endif
136 changes: 135 additions & 1 deletion historyserver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,138 @@
This project is under active development.
See [#ray-history-server](https://app.slack.com/client/TN4768NRM/C09QLLU8HTL) channel to provide feedback.

<!-- TODO: Add docs for local development -->
Ray History Server is a service for collecting, storing, and viewing historical logs and metadata from Ray clusters. It provides a web interface to explore the history of Ray jobs, tasks, actors, and other cluster activities.

## Components

The History Server consists of two main components:

1. **Collector**: Runs as a sidecar container in Ray clusters to collect logs and metadata
2. **History Server**: Central service that aggregates data from collectors and provides a web UI

## Building

### Prerequisites

- Go 1.19 or higher
- Docker (for building container images)
- Make

### Building Binaries

To build the binaries locally:

```bash
make build
```

This will generate two binaries in the `output/bin/` directory:
- `collector`: The collector service that runs alongside Ray nodes
- `historyserver`: The main history server service

You can also build individual components:
```bash
make buildcollector # Build only the collector
make buildhistoryserver # Build only the history server
```

### Building Docker Images

To build a Docker image:

```bash
make localimage
```

This creates a Docker image named `historyserver:laster` with both binaries and necessary assets.

For multi-platform builds, you can use:
```bash
docker buildx build -t <image-name>:<tag> --platform linux/amd64,linux/arm64 . --push
```

## Configuration

### History Server Configuration

The history server can be configured using command-line flags:

- `--runtime-class-name`: Storage backend type (e.g., "s3", "aliyunoss", "localtest")
- `--ray-root-dir`: Root directory for Ray logs
- `--kubeconfigs`: Path to kubeconfig file(s) for accessing Kubernetes clusters
- `--dashboard-dir`: Directory containing dashboard assets (default: "/dashboard")
- `--runtime-class-config-path`: Path to runtime class configuration file

### Collector Configuration

The collector can be configured using command-line flags:

- `--role`: Node role ("Head" or "Worker")
- `--runtime-class-name`: Storage backend type (e.g., "s3", "aliyunoss")
- `--ray-cluster-name`: Name of the Ray cluster
- `--ray-cluster-id`: ID of the Ray cluster
- `--ray-root-dir`: Root directory for Ray logs
- `--log-batching`: Number of log entries to batch before writing
- `--events-port`: Port for the events server
- `--push-interval`: Interval between pushes to storage
- `--runtime-class-config-path`: Path to runtime class configuration file

## Supported Storage Backends

History Server supports multiple storage backends:

1. **S3/MinIO**: For AWS S3 or MinIO compatible storage
2. **Aliyun OSS**: For Alibaba Cloud Object Storage Service
3. **Local Test**: For local testing and development

Each backend requires specific configuration parameters passed through environment variables or configuration files.

## Running

### Running the History Server

```bash
./output/bin/historyserver \
--runtime-class-name=s3 \
--ray-root-dir=/path/to/logs
```

### Running the Collector

```bash
./output/bin/collector \
--role=Head \
--runtime-class-name=s3 \
--ray-cluster-name=my-cluster \
--ray-root-dir=/path/to/logs
```

## Development

### Code Structure

- `cmd/`: Main applications (collector and historyserver)
- `backend/`: Core logic for storage backends and collection
- `backend/collector/`: Collector-specific code
- `backend/historyserver/`: History server implementation
- `dashboard/`: Web UI files

### Testing

To run tests:

```bash
make test
```

### Linting

To run lint checks:

```bash
make alllint
```

## Deployment

History Server can be deployed in Kubernetes using the manifests in the `config/samples/` directory. Examples are provided for different storage backends including MinIO and Aliyun OSS.
Loading
Loading