Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
add cached word count to attributes, backfill task
  • Loading branch information
drusepth committed Oct 7, 2021
commit c312d439994bae70f605d3c9af1f67d40a90a195
30 changes: 30 additions & 0 deletions app/jobs/cache_attribute_word_count_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
class CacheAttributeWordCountJob < ApplicationJob
queue_as :cache

def perform(*args)
attribute_id = args.shift
attribute = Attribute.find_by(id: attribute_id)

return if attribute.nil?
return if attribute.value.nil? || attribute.value.blank?

word_count = WordCountAnalyzer::Counter.new(
ellipsis: 'no_special_treatment',
hyperlink: 'count_as_one',
contraction: 'count_as_one',
hyphenated_word: 'count_as_one',
date: 'no_special_treatment',
number: 'count',
numbered_list: 'ignore',
xhtml: 'remove',
forward_slash: 'count_as_multiple_except_dates',
backslash: 'count_as_one',
dotted_line: 'ignore',
dashed_line: 'ignore',
underscore: 'ignore',
stray_punctuation: 'ignore'
).count(attribute.value)

attribute.update!(word_count_cache: word_count)
end
end
4 changes: 4 additions & 0 deletions app/models/page_data/attribute.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ class Attribute < ApplicationRecord
end
end

after_commit do
CacheAttributeWordCountJob.perform_later(self.id) if saved_changes.key?('value')
end

after_save do
entity.touch
end
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddWordCountCacheToAttributes < ActiveRecord::Migration[6.0]
def change
add_column :attributes, :word_count_cache, :integer
end
end
3 changes: 2 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 2021_09_15_030031) do
ActiveRecord::Schema.define(version: 2021_10_07_215520) do

create_table "active_storage_attachments", force: :cascade do |t|
t.string "name", null: false
Expand Down Expand Up @@ -165,6 +165,7 @@
t.datetime "created_at"
t.datetime "updated_at"
t.datetime "deleted_at"
t.integer "word_count_cache"
t.index ["attribute_field_id", "deleted_at", "entity_id", "entity_type"], name: "attributes_afi_deleted_at_entity_id_entity_type"
t.index ["attribute_field_id", "deleted_at"], name: "index_attributes_on_attribute_field_id_and_deleted_at"
t.index ["attribute_field_id", "user_id", "entity_type", "entity_id", "deleted_at"], name: "attributes_afi_ui_et_ei_da"
Expand Down
24 changes: 24 additions & 0 deletions lib/tasks/backfill.rake
Original file line number Diff line number Diff line change
@@ -1,4 +1,28 @@
namespace :backfill do
desc "Backfill cached word counts on all attributes"
task attribute_word_count_caches: :environment do
Attribute.where(cached_word_count: nil).find_each do |attribute|
word_count = WordCountAnalyzer::Counter.new(
ellipsis: 'no_special_treatment',
hyperlink: 'count_as_one',
contraction: 'count_as_one',
hyphenated_word: 'count_as_one',
date: 'no_special_treatment',
number: 'count',
numbered_list: 'ignore',
xhtml: 'remove',
forward_slash: 'count_as_multiple_except_dates',
backslash: 'count_as_one',
dotted_line: 'ignore',
dashed_line: 'ignore',
underscore: 'ignore',
stray_punctuation: 'ignore'
).count(attribute.value)

attribute.update_column(:word_count_cache, word_count)
end
end

desc "Backfill cached word counts on all documents"
task document_word_count_caches: :environment do
Document.where(cached_word_count: nil).where.not(body: [nil, ""]).find_each(batch_size: 500) do |document|
Expand Down