Skip to content

Commit 4dc6451

Browse files
authored
Merge pull request #892 from clojars/revert-891-489-sitemaps
Revert "Generate sitemaps with other slow changing data"
2 parents c6ad201 + 135badf commit 4dc6451

File tree

5 files changed

+9
-152
lines changed

5 files changed

+9
-152
lines changed

deps.edn

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@
6262
org.bouncycastle/bcpkix-jdk18on {:mvn/version "1.78"}
6363
org.bouncycastle/bcprov-jdk18on {:mvn/version "1.78"}
6464
org.clojure/clojure {:mvn/version "1.12.0"}
65-
org.clojure/data.xml {:mvn/version "0.2.0-alpha9"}
6665
org.clojure/tools.logging {:mvn/version "1.2.4"}
6766
org.clojure/tools.nrepl {:mvn/version "0.2.11"}
6867
org.postgresql/postgresql {:mvn/version "42.7.2"}

src/clojars/db.clj

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -600,12 +600,6 @@
600600
:from :jars
601601
:order-by :id}))))
602602

603-
604-
(defn all-groups [db]
605-
(q db {:select-distinct [:group_name]
606-
:from :jars
607-
:order-by [[:group_name :asc]]}))
608-
609603
(defn find-dependencies
610604
[db groupname jarname version]
611605
(q db
@@ -635,13 +629,6 @@
635629
:limit limit-num
636630
:offset offset-num}))
637631

638-
(defn all-users
639-
[db]
640-
(q db
641-
{:select [:user]
642-
:from :users
643-
:order-by [[:user :asc]]}))
644-
645632
(defn count-all-projects
646633
[db]
647634
(->

src/clojars/tools/generate_feeds.clj

Lines changed: 7 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
[clojars.maven :as maven]
88
[clojars.s3 :as s3]
99
[clojars.util :as util]
10-
[clojure.data.xml :as xml]
1110
[clojure.java.io :as io]
1211
[clojure.set :as set])
1312
(:import
@@ -30,14 +29,11 @@
3029
:scm-tag (:tag scm)))
3130
distinct-jars)}))
3231

33-
(defn- all-grouped-jars [db]
34-
(->> (db/all-jars db)
35-
(maven/sort-by-version)
36-
(reverse) ;; We want the most recent version first
37-
(group-by (juxt :group_name :jar_name))))
38-
3932
(defn full-feed [db]
40-
(let [grouped-jars (all-grouped-jars db)]
33+
(let [grouped-jars (->> (db/all-jars db)
34+
(maven/sort-by-version)
35+
(reverse) ;; We want the most recent version first
36+
(group-by (juxt :group_name :jar_name)))]
4137
(->> (for [[[group-id artifact-id] jars] grouped-jars]
4238
(try
4339
(let [base-jar (first jars)]
@@ -88,106 +84,12 @@
8884
[(fu/create-checksum-file f :md5)
8985
(fu/create-checksum-file f :sha1)])
9086

91-
(def sitemap-ns "http://www.sitemaps.org/schemas/sitemap/0.9")
92-
(xml/alias-uri 'sitemap sitemap-ns)
93-
94-
(defn links-list [base-url db]
95-
(let [ ;; get current domain
96-
97-
base-links [ ;; index, projects, security, dmca
98-
""
99-
"/projects"
100-
"/security"
101-
"/dmca"]
102-
group-links (into []
103-
(comp
104-
(map :group_name)
105-
(map #(str "/groups/" %)))
106-
(db/all-groups db))
107-
108-
artifact-links (into []
109-
(mapcat
110-
(fn [[[group-id artifact-id] jars]]
111-
(let [artifact-links
112-
(concat
113-
[(str "/" artifact-id)
114-
(str "/" artifact-id "/dependents")
115-
(str "/" artifact-id "/versions")]
116-
(into []
117-
(map
118-
#(str "/" artifact-id "/versions/" (:version %)))
119-
jars))]
120-
(if (= group-id artifact-id)
121-
artifact-links
122-
(map #(str "/" group-id %) artifact-links)))))
123-
(all-grouped-jars db))
124-
125-
user-links (into []
126-
(map :user)
127-
(db/all-users db))
128-
129-
all-links (concat base-links
130-
group-links
131-
artifact-links
132-
user-links)]
133-
(map #(str base-url %) all-links)))
134-
135-
(defn write-sitemap
136-
"returns sitemap filename"
137-
[dest index links]
138-
(let [sitemap-file (str dest "/sitemap-" index ".xml")
139-
sitemap-file-gz (str sitemap-file ".gz")
140-
sitemap [::sitemap/urlset {:xmlns sitemap-ns}
141-
(for [link links]
142-
[::sitemap/url
143-
[::sitemap/loc link]])]
144-
data [(xml/sexp-as-element sitemap)]]
145-
[(write-to-file data
146-
sitemap-file
147-
nil
148-
#(xml/emit % *out*))
149-
(write-to-file data
150-
sitemap-file-gz
151-
:gzip
152-
#(xml/emit % *out*))]))
153-
154-
(defn write-sitemap-index
155-
"returns sitemap index filename"
156-
[base-url dest sitemap-files]
157-
(let [sitemap-index-file (str dest "/sitemap.xml")
158-
sitemap-index-file-gz (str sitemap-index-file ".gz")
159-
sitemap-index [::sitemap/sitemapindex {:xmlns sitemap-ns}
160-
(for [[sitemap-file _] sitemap-files]
161-
[::sitemap/sitemap
162-
[::sitemap/loc (str base-url "/" sitemap-file)]])]
163-
data [(xml/sexp-as-element sitemap-index)]]
164-
[(write-to-file data
165-
sitemap-index-file
166-
nil
167-
#(xml/emit % *out*))
168-
(write-to-file data
169-
sitemap-index-file-gz
170-
:gzip
171-
#(xml/emit % *out*))]))
172-
173-
(defn generate-sitemaps
174-
"base-url - without the trailing slash"
175-
[base-url dest db]
176-
(let [sitemap-files (->> (links-list base-url db)
177-
(partition-all 50000)
178-
(map-indexed #(write-sitemap dest %1 %2))
179-
((juxt #(write-sitemap-index base-url dest %) identity))
180-
(apply concat)
181-
(apply list*))
182-
checksum-files (mapcat write-sums sitemap-files)]
183-
(concat sitemap-files checksum-files)))
184-
18587
(defn put-files [s3-bucket & files]
18688
(run! #(let [f (io/file %)]
18789
(s3/put-file s3-bucket (.getName f) f {:ACL "public-read"}))
18890
files))
18991

190-
(defn generate-feeds [dest base-url db s3-bucket]
92+
(defn generate-feeds [dest db s3-bucket]
19193
(let [feed-file (str dest "/feed.clj.gz")]
19294
(apply put-files
19395
s3-bucket
@@ -214,15 +116,10 @@
214116
(write-to-file jars gz-file :gzip)
215117
(concat
216118
(write-sums jar-file)
217-
(write-sums gz-file))))
218-
219-
(apply put-files
220-
s3-bucket
221-
(generate-sitemaps base-url dest db)))
119+
(write-sums gz-file)))))
222120

223121
(defn -main [feed-dir env]
224-
(let [{:keys [db s3 base-url]} (config (keyword env))]
122+
(let [{:keys [db s3]} (config (keyword env))]
225123
(generate-feeds feed-dir
226-
base-url
227124
db
228125
(s3/s3-client (:repo-bucket s3)))))

src/clojars/web/common.clj

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
(link-to "/dmca" "DMCA")
4242
(link-to "https://github.com/clojars/clojars-web/wiki/" "help")
4343
(link-to "https://github.com/clojars/clojars-web/wiki/Data" "API")
44-
(link-to "/repo/sitemap.xml" "sitemap")
4544
[:div.sponsors
4645
[:div.sponsors-title
4746
"Hosting costs sponsored by:"]

test/clojars/unit/tools/generate_feeds_test.clj

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,12 @@
66
[clojars.s3 :as s3]
77
[clojars.test-helper :as help]
88
[clojars.tools.generate-feeds :as feeds]
9-
[clojure.data.xml :as xml]
109
[clojure.java.io :as io]
1110
[clojure.string :as str]
1211
[clojure.test :refer [deftest is use-fixtures]]
1312
[matcher-combinators.matchers :as m]
1413
[matcher-combinators.test])
1514
(:import
16-
(java.io
17-
File)
1815
(java.util
1916
Date)
2017
(java.util.zip
@@ -116,7 +113,7 @@
116113
(is (s3/object-exists? cf (str name ".sha1")))))
117114

118115
(deftest the-whole-enchilada
119-
(feeds/generate-feeds "/tmp" "http://example.org" help/*db* help/*s3-repo-bucket*)
116+
(feeds/generate-feeds "/tmp" help/*db* help/*s3-repo-bucket*)
120117
(let [feed-file (io/file "/tmp" "feed.clj.gz")]
121118
(verify-file-and-sums feed-file)
122119
(verify-s3 help/*s3-repo-bucket* feed-file)
@@ -155,26 +152,4 @@
155152
(slurp)
156153
(format "[%s]")
157154
(read-string))]
158-
(is (= expected-jar-list read-jars))))
159-
160-
(let [sitemap-index-file (io/file "/tmp" "sitemap-index.xml.gz")
161-
sitemap-file (io/file "/tmp" "sitemap-0.xml.gz")]
162-
(verify-file-and-sums sitemap-index-file)
163-
(verify-file-and-sums sitemap-file)
164-
(with-open [in (io/input-stream sitemap-index-file)]
165-
(let [sitemap-index (-> in (GZIPInputStream.) (xml/parse))
166-
first-sitemap (-> sitemap-index :content first)
167-
first-loc (-> first-sitemap :content first)]
168-
(is (= "sitemapindex" (name (:tag sitemap-index))))
169-
(is (some? first-sitemap))
170-
(is (= "sitemap" (name (:tag first-sitemap))))
171-
(is (some? first-loc))
172-
(is (= "loc" (name (:tag first-loc))))
173-
(is (str/ends-with? (->> first-loc :content first) (File/.getPath sitemap-file)))))
174-
(with-open [in (io/input-stream sitemap-file)]
175-
(let [sitemap (-> in (GZIPInputStream.) (xml/parse))
176-
first-url (-> sitemap :content first)
177-
first-loc (-> first-url :content first)]
178-
(is (= "urlset" (-> sitemap :tag name)))
179-
(is (= "url" (-> first-url :tag name)))
180-
(is (= "loc" (-> first-loc :tag name)))))))
155+
(is (= expected-jar-list read-jars)))))

0 commit comments

Comments
 (0)