Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions app/jobs/cache_attribute_word_count_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
class CacheAttributeWordCountJob < ApplicationJob
queue_as :cache

def perform(*args)
attribute_id = args.shift
attribute = Attribute.find_by(id: attribute_id)

return if attribute.nil?
return if attribute.value.nil? || attribute.value.blank?

word_count = WordCountAnalyzer::Counter.new(
ellipsis: 'no_special_treatment',
hyperlink: 'count_as_one',
contraction: 'count_as_one',
hyphenated_word: 'count_as_one',
date: 'no_special_treatment',
number: 'count',
numbered_list: 'ignore',
xhtml: 'remove',
forward_slash: 'count_as_multiple_except_dates',
backslash: 'count_as_one',
dotted_line: 'ignore',
dashed_line: 'ignore',
underscore: 'ignore',
stray_punctuation: 'ignore'
).count(attribute.value)

attribute.update!(word_count_cache: word_count)
end
end
19 changes: 19 additions & 0 deletions app/jobs/cache_sum_attribute_word_count_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
class CacheSumAttributeWordCountJob < ApplicationJob
queue_as :cache

def perform(*args)
entity_type = args.shift
entity_id = args.shift

entity = entity_type.constantize.find_by(id: entity_id)
sum_attribute_word_count = Attribute.where(entity_type: entity_type, entity_id: entity_id).sum(:word_count_cache)

update = entity.word_count_updates.find_or_initialize_by(
for_date: DateTime.current,
)
update.word_count = sum_attribute_word_count
update.user_id ||= entity.user_id

update.save!
end
end
12 changes: 10 additions & 2 deletions app/jobs/save_document_revision_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,21 @@ class SaveDocumentRevisionJob < ApplicationJob
def perform(*args)
document_id = args.shift

document = Document.find(document_id)
return unless document.present?
document = Document.find_by(id: document_id)
return unless document

# Update cached word count for the document regardless of how often this is called
new_word_count = document.computed_word_count
document.update(cached_word_count: new_word_count)

# Save a WordCountUpdate for this document for today
update = document.word_count_updates.find_or_initialize_by(
for_date: DateTime.current,
)
update.word_count = new_word_count
update.user_id ||= document.user_id
update.save!

# Make sure we're only storing revisions at least every 5 min
latest_revision = document.document_revisions.order('created_at DESC').limit(1).first
if latest_revision.present? && latest_revision.created_at > 5.minutes.ago
Expand Down
5 changes: 5 additions & 0 deletions app/models/concerns/is_content_page.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ module IsContentPage
has_many :timeline_events, through: :timeline_event_entities
has_many :timelines, -> { distinct }, through: :timeline_events

has_many :word_count_updates, as: :entity, dependent: :destroy
def latest_word_count_cache
word_count_updates.order('for_date DESC').limit(1).first.try(:word_count) || 0
end

scope :unarchived, -> { where(archived_at: nil) }
def archive!
update!(archived_at: DateTime.now)
Expand Down
6 changes: 6 additions & 0 deletions app/models/documents/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ class Document < ApplicationRecord

attr_accessor :tagged_text

# Duplicated from is_content_page since we don't include that here yet
has_many :word_count_updates, as: :entity, dependent: :destroy
def latest_word_count_cache
word_count_updates.order('for_date DESC').limit(1).first.try(:word_count) || 0
end

KEYS_TO_TRIGGER_REVISION_ON_CHANGE = %w(title body synopsis notes_text)

def self.color
Expand Down
10 changes: 10 additions & 0 deletions app/models/page_data/attribute.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ class Attribute < ApplicationRecord
end
end

after_commit do
if saved_changes.key?('value')
# Cache the updated word count on this attribute
CacheAttributeWordCountJob.perform_later(self.id)

# Cache the updated word count on the page this attribute belongs to
CacheSumAttributeWordCountJob.perform_later(self.entity_type, self.entity_id)
end
end

after_save do
entity.touch
end
Expand Down
4 changes: 4 additions & 0 deletions app/models/word_count_update.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class WordCountUpdate < ApplicationRecord
belongs_to :user
belongs_to :entity, polymorphic: true
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddWordCountCacheToAttributes < ActiveRecord::Migration[6.0]
def change
add_column :attributes, :word_count_cache, :integer
end
end
12 changes: 12 additions & 0 deletions db/migrate/20211007234707_create_word_count_updates.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
class CreateWordCountUpdates < ActiveRecord::Migration[6.0]
def change
create_table :word_count_updates do |t|
t.references :user, null: false, foreign_key: true
t.references :entity, polymorphic: true, null: false
t.integer :word_count
t.date :for_date

t.timestamps
end
end
end
16 changes: 15 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 2021_09_15_030031) do
ActiveRecord::Schema.define(version: 2021_10_07_234707) do

create_table "active_storage_attachments", force: :cascade do |t|
t.string "name", null: false
Expand Down Expand Up @@ -165,6 +165,7 @@
t.datetime "created_at"
t.datetime "updated_at"
t.datetime "deleted_at"
t.integer "word_count_cache"
t.index ["attribute_field_id", "deleted_at", "entity_id", "entity_type"], name: "attributes_afi_deleted_at_entity_id_entity_type"
t.index ["attribute_field_id", "deleted_at"], name: "index_attributes_on_attribute_field_id_and_deleted_at"
t.index ["attribute_field_id", "user_id", "entity_type", "entity_id", "deleted_at"], name: "attributes_afi_ui_et_ei_da"
Expand Down Expand Up @@ -3636,6 +3637,18 @@
t.integer "habitat_id"
end

create_table "word_count_updates", force: :cascade do |t|
t.integer "user_id", null: false
t.string "entity_type", null: false
t.integer "entity_id", null: false
t.integer "word_count"
t.date "for_date"
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
t.index ["entity_type", "entity_id"], name: "index_word_count_updates_on_entity_type_and_entity_id"
t.index ["user_id"], name: "index_word_count_updates_on_user_id"
end

add_foreign_key "active_storage_attachments", "active_storage_blobs", column: "blob_id"
add_foreign_key "api_keys", "users"
add_foreign_key "api_requests", "application_integrations"
Expand Down Expand Up @@ -4047,4 +4060,5 @@
add_foreign_key "vehicles", "users"
add_foreign_key "votes", "users"
add_foreign_key "votes", "votables"
add_foreign_key "word_count_updates", "users"
end
24 changes: 24 additions & 0 deletions lib/tasks/backfill.rake
Original file line number Diff line number Diff line change
@@ -1,4 +1,28 @@
namespace :backfill do
desc "Backfill cached word counts on all attributes"
task attribute_word_count_caches: :environment do
Attribute.where(word_count_cache: nil).where.not(value: ["", " ", ".", nil]).find_each do |attribute|
word_count = WordCountAnalyzer::Counter.new(
ellipsis: 'no_special_treatment',
hyperlink: 'count_as_one',
contraction: 'count_as_one',
hyphenated_word: 'count_as_one',
date: 'no_special_treatment',
number: 'count',
numbered_list: 'ignore',
xhtml: 'remove',
forward_slash: 'count_as_multiple_except_dates',
backslash: 'count_as_one',
dotted_line: 'ignore',
dashed_line: 'ignore',
underscore: 'ignore',
stray_punctuation: 'ignore'
).count(attribute.value)

attribute.update_column(:word_count_cache, word_count)
end
end

desc "Backfill cached word counts on all documents"
task document_word_count_caches: :environment do
Document.where(cached_word_count: nil).where.not(body: [nil, ""]).find_each(batch_size: 500) do |document|
Expand Down
15 changes: 15 additions & 0 deletions test/fixtures/word_count_updates.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html

one:
user: one
entity: one
entity_type: Entity
word_count: 1
for_date: 2021-10-07

two:
user: two
entity: two
entity_type: Entity
word_count: 1
for_date: 2021-10-07
7 changes: 7 additions & 0 deletions test/models/word_count_update_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
require 'test_helper'

class WordCountUpdateTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
end