Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
finalize
  • Loading branch information
CodingWithTim committed Jan 20, 2024
commit 4c66f701f1fb8acb8b47f8d8e67f375441ac6c1c
7 changes: 4 additions & 3 deletions fastchat/serve/monitor/topic_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,15 +244,16 @@ def get_cluster_info(texts, labels, topk_indices):
texts = read_texts(
args.input_file, args.min_length, args.max_length, args.english_only
)
print(f"#text: {len(texts)}")

if args.embeddings_file is None:
print(f"#text: {len(texts)}")
embeddings = get_embeddings(texts, args.model, args.batch_size)
print(f"embeddings shape: {embeddings.shape}")
if args.save_embeddings:
# allow saving embedding to save time
# allow saving embedding to save time and money
torch.save(embeddings, "embeddings.pt")
else:
embeddings = torch.load(args.embeddings_file)
print(f"embeddings shape: {embeddings.shape}")

if args.cluster_alg == "kmeans":
centers, labels = run_k_means(embeddings, num_clusters)
Expand Down