|
5 | 5 | [clojars.db :as db] |
6 | 6 | [clojars.maven :as maven] |
7 | 7 | [clojars.stats :as stats] |
8 | | - [clojure.set :as set] |
9 | 8 | [clojure.string :as str] |
10 | 9 | [com.stuartsierra.component :as component]) |
11 | 10 | (:import |
|
55 | 54 | (set! *warn-on-reflection* true) |
56 | 55 |
|
57 | 56 | (defprotocol Search |
58 | | - (index! [t pom]) |
| 57 | + (index! [t version-details]) |
59 | 58 | (search [t query page]) |
60 | 59 | (delete! |
61 | 60 | [t group-id] |
62 | 61 | [t group-id artifact-id])) |
63 | 62 |
|
64 | | -(def ^:private renames |
65 | | - {:name :artifact-id |
66 | | - :jar_name :artifact-id |
67 | | - :group :group-id |
68 | | - :group_name :group-id |
69 | | - :created :at |
70 | | - :homepage :url}) |
71 | | - |
72 | | -(defn- doc-id ^String [group-id artifact-id] |
| 63 | +(defn- doc-id ^String |
| 64 | + [group-id artifact-id] |
73 | 65 | (str group-id ":" artifact-id)) |
74 | 66 |
|
75 | | -(defn- jar->id ^String [{:keys [artifact-id group-id]}] |
76 | | - (doc-id group-id artifact-id)) |
| 67 | +(defn- jar->id ^String |
| 68 | + [{:keys [group_name jar_name]}] |
| 69 | + (doc-id group_name jar_name)) |
77 | 70 |
|
78 | | -(defn delete-from-index [^IndexWriter index-writer ^String group-id & [artifact-id]] |
| 71 | +(defn delete-from-index |
| 72 | + [^IndexWriter index-writer ^String group-id & [artifact-id]] |
79 | 73 | (let [term (if artifact-id |
80 | 74 | (Term. "id" (doc-id group-id artifact-id)) |
81 | 75 | (Term. "group-id" group-id))] |
|
152 | 146 | (str/replace v #"\.(\s|$)" " ")))) |
153 | 147 |
|
154 | 148 | (def ^:private content-items |
155 | | - [:artifact-id |
156 | | - (hyphen-remover :artifact-id) |
157 | | - :group-id |
158 | | - (hyphen-remover :group-id) |
| 149 | + [:jar_name |
| 150 | + (hyphen-remover :jar_name) |
| 151 | + :group_name |
| 152 | + (hyphen-remover :group_name) |
159 | 153 | ;; Include 'group name' & 'group name/artifact-name' in content (for a |
160 | | - ;; group-id of group.name) to aid in searching for things where new projects |
| 154 | + ;; group_name of group.name) to aid in searching for things where new projects |
161 | 155 | ;; had to be deployed under a domain-based group |
162 | | - (period-remover :group-id) |
163 | | - (period-remover #(->> % ((juxt :group-id :artifact-id)) (str/join "/"))) |
| 156 | + (period-remover :group_name) |
| 157 | + (period-remover #(->> % ((juxt :group_name :jar_name)) (str/join "/"))) |
164 | 158 | ;; Include 'group-name/artifact-name' in content to allow |
165 | 159 | ;; the "group-name/artifact-name" phrase to find it |
166 | | - #(->> % ((juxt :group-id :artifact-id)) (str/join "/")) |
| 160 | + #(->> % ((juxt :group_name :jar_name)) (str/join "/")) |
167 | 161 | (sentence-period-remover :description) |
168 | | - :url |
| 162 | + :homepage |
169 | 163 | :version |
170 | | - #(->> % :authors (str/join " "))]) |
| 164 | + #(when-some [authors (:authors %)] |
| 165 | + (str/replace authors #"," " "))]) |
171 | 166 |
|
172 | 167 | (def ^:private ^String content-field-name "_content") |
173 | 168 | (def ^:private ^String boost-field-name "_download_boost") |
|
182 | 177 | [^String name ^String value] |
183 | 178 | (TextField. name value Field$Store/YES)) |
184 | 179 |
|
185 | | -(defn jar->doc |
186 | | - ^Iterable [{:keys [at |
187 | | - artifact-id |
188 | | - group-id |
189 | | - description |
190 | | - licenses |
191 | | - url |
192 | | - version] |
193 | | - :or {at (Date.)} |
194 | | - :as jar} |
195 | | - download-boost] |
| 180 | +(defn- jar->doc ^Iterable |
| 181 | + [{:keys [created description group_name homepage jar_name licenses version] :as jar} |
| 182 | + download-boost] |
196 | 183 | (doto (Document.) |
197 | 184 | ;; id: We need a unique identifier for each doc so that we can use updateDocument |
198 | 185 | (.add (string-field "id" (jar->id jar))) |
199 | | - (.add (string-field "artifact-id" artifact-id)) |
200 | | - (.add (string-field "group-id" group-id)) |
| 186 | + (.add (string-field "artifact-id" jar_name)) |
| 187 | + (.add (string-field "group-id" group_name)) |
201 | 188 | (cond-> description |
202 | 189 | (.add (text-field "description" description))) |
203 | | - (.add (string-field "at" (str (.getTime ^Date at)))) |
| 190 | + (.add (string-field "at" (str (.getTime ^Date created)))) |
204 | 191 | (.add (text-field "licenses" (str/join " " (map :name licenses)))) |
205 | | - (cond-> url |
206 | | - (.add (string-field "url" url))) |
| 192 | + (cond-> homepage |
| 193 | + (.add (string-field "url" homepage))) |
207 | 194 | ;; version isn't really useful to search, since we only store the |
208 | 195 | ;; most-recently-seen value, but we have it here because we've had it |
209 | 196 | ;; historically |
|
226 | 213 | (def download-score-weight 50) |
227 | 214 |
|
228 | 215 | (defn- calculate-document-boost |
229 | | - [stats {:as _jar :keys [artifact-id group-id]}] |
| 216 | + [stats {:as _jar :keys [group_name jar_name]}] |
230 | 217 | (let [total (stats/total-downloads stats)] |
231 | 218 | (* download-score-weight |
232 | | - (/ (or (stats/download-count stats group-id artifact-id) 0) |
| 219 | + (/ (or (stats/download-count stats group_name jar_name) 0) |
233 | 220 | (max 1 total))))) |
234 | 221 |
|
235 | 222 | (defn disk-index |
|
239 | 226 |
|
240 | 227 | (defn- index-jar |
241 | 228 | [^IndexWriter index-writer stats jar] |
242 | | - (let [jar' (set/rename-keys jar renames) |
243 | | - doc (jar->doc jar' (calculate-document-boost stats jar'))] |
| 229 | + (let [doc (jar->doc jar (calculate-document-boost stats jar))] |
244 | 230 | ;; always delete and replace the doc, since we are indexing every version |
245 | 231 | ;; and the last one wins |
246 | | - (.updateDocument index-writer (Term. "id" (jar->id jar')) doc))) |
| 232 | + (.updateDocument index-writer (Term. "id" (jar->id jar)) doc))) |
247 | 233 |
|
248 | 234 | (defn- track-index-status |
249 | 235 | [{:keys [indexed last-time] :as status}] |
|
384 | 370 |
|
385 | 371 | (defrecord LuceneSearch [stats index-factory ^Directory index] |
386 | 372 | Search |
387 | | - (index! [_t pom] |
| 373 | + (index! [_t version-details] |
388 | 374 | (with-open [index-writer (index-writer index false)] |
389 | | - (index-jar index-writer stats pom))) |
| 375 | + (index-jar index-writer stats version-details))) |
390 | 376 | (search [_t query page] |
391 | 377 | (-search index query page)) |
392 | 378 | (delete! [_t group-id] |
|
0 commit comments