diff --git a/.capt/hooks/commit-msg b/.capt/hooks/commit-msg deleted file mode 100755 index 1b5edbd..0000000 --- a/.capt/hooks/commit-msg +++ /dev/null @@ -1 +0,0 @@ -capt $(basename $0) $@ diff --git a/.capt/hooks/post-checkout b/.capt/hooks/post-checkout deleted file mode 100755 index 1b5edbd..0000000 --- a/.capt/hooks/post-checkout +++ /dev/null @@ -1 +0,0 @@ -capt $(basename $0) $@ diff --git a/.capt/hooks/post-commit b/.capt/hooks/post-commit deleted file mode 100755 index 1b5edbd..0000000 --- a/.capt/hooks/post-commit +++ /dev/null @@ -1 +0,0 @@ -capt $(basename $0) $@ diff --git a/.capt/hooks/post-rewrite b/.capt/hooks/post-rewrite deleted file mode 100755 index 1b5edbd..0000000 --- a/.capt/hooks/post-rewrite +++ /dev/null @@ -1 +0,0 @@ -capt $(basename $0) $@ diff --git a/.capt/hooks/pre-commit b/.capt/hooks/pre-commit deleted file mode 100755 index 1b5edbd..0000000 --- a/.capt/hooks/pre-commit +++ /dev/null @@ -1 +0,0 @@ -capt $(basename $0) $@ diff --git a/.capt/hooks/pre-push b/.capt/hooks/pre-push deleted file mode 100755 index 1b5edbd..0000000 --- a/.capt/hooks/pre-push +++ /dev/null @@ -1 +0,0 @@ -capt $(basename $0) $@ diff --git a/.capt/hooks/prepare-commit-msg b/.capt/hooks/prepare-commit-msg deleted file mode 100755 index 1b5edbd..0000000 --- a/.capt/hooks/prepare-commit-msg +++ /dev/null @@ -1 +0,0 @@ -capt $(basename $0) $@ diff --git a/.capt/share.sh b/.capt/share.sh deleted file mode 100644 index c776d89..0000000 --- a/.capt/share.sh +++ /dev/null @@ -1,16 +0,0 @@ -# Captain git-hook manager control file - -pre_commit=( - 'hithere: echo ## just print changed files' - # mdlint - cljfmt - 'cljlint(dbdoc)' # won't work since no longer a .clj file - fixcheck - wscheck -) - -commit_msg=( msglint ) - -prepare_commit_msg=( br2msg ) - -post_commit=( colorquote ) diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 81ffd55..0000000 --- a/.gitignore +++ /dev/null @@ -1 +0,0 @@ -resources/migrations/* diff --git a/README.org b/README.org deleted file mode 100644 index 5d98327..0000000 --- a/README.org +++ /dev/null @@ -1,400 +0,0 @@ -#+Title: DBDoc (Database Schema Documenter) - -Document your database schema (tables and columns), because your team will -thank you, and this makes it easy. You really need a [[https://en.wikipedia.org/wiki/Data_dictionary][Data Dictionary]] to -describe your database. Your team will be living in chaos until you have one. -DBDoc gives you a simple one that everyone can easily work with. - -DBDoc enables you to describe (via generated =COMMENT ON= statements) your -relational database schema(s) in a simple text file, which is easy for -developers/DBAs to edit and search in the repo. The docs (each a snippet of a -sentence or a few, all in a single, versatile =dbdoc.org= file) are then: - -- *viewable in an SQL client* like DBeaver or Datagrip (the main use - case) as tooltips and in other views - -- *greppable* in your code base - -- *web-publishable as docs*, enabling other - stakeholders (eg, Product people) to view DB documentation (in - Confluence or wherever) - -- *presentable* one table at a time for brainstorming, explaining, etc, - with an org presenter, like [[https://github.com/eschulte/epresent][epresent]] or [[https://github.com/takaxp/org-tree-slide][org-tree-slide]] (screenshot - at bottom) - -It works by running a simple =dbdoc= script to convert a very minimal -ORG-valid and -prescribed syntax (top-level bullets, single paragraphs, and -definition lists) into (long) [[https://www.postgresql.org/docs/current/sql-comment.html][SQL =COMMENT ON= statements]], which can be run -automatically on your database (via migration or sourced or however you like). - -#+html:

-#+caption: DBeaver showing a hover on column/heading - -_(See far below for more screenshots.)_ - -** Changelog - -*** *2023-09-12* (all postgres-only) -- Support non-=public= schemas (use =someschema.sometable= for top-level bullet) -- Round-tripper: see data in DB not in ORG file -- Round-tripper: see conflicts between DB and ORG file -- Error checking for duplicate tables and fields in ORG file (common when - pasting from round-tripping) - -** Example dbdoc.org file - -The following shows an example =dbdoc.org= file describing a movie store -rental database and a few of its tables: =film= (containing =title= -and =description= columns), =movie= (a deprecated table with no -columns documented), and =actor=. Notice: the hyphens instead of -underscores, newlines before definitions, other indentation. - -An example translation then is from: - -#+begin_src org -#+Title: Pagila Movie Store Rental Database - -This is the "dbdoc" description file for the Pagila database. See the -[[https://github.com/micahelliott/dbdoc][dbdoc README]] -for more detailed instructions on its purpose and expanding it. This file -contains short documentation for any tables and columns that could use -even the slightest bit of explanation. - -Edit this file whenever you make schema changes. And be a good citizen -by helping to grow this file any time you're touching a table! -The remainder of this file will be used processed into comment -descriptions that will be visible in your SQL client, and can also be -exported as HTML. - -* FILM - -A film, aka movie, is released initially in theaters, and then -available to movie /stores/, at which point they become available to -the DB. - -- title :: - The full name of the film, including things like sub-title and part - in a series; does not include language - -- description :: - A brief synopsis (catchy prose) about the plot - -* MOVIE - -DEPRECATED: replaced by =film= - -* ACTOR - -An actor is very simple and non-comprehensive table to record the main -headlining /stars/ of the film. All fields are obvious. Note that -there may be duplicate actors that use slightly different names on occasion. -#+end_src - -to an SQL migration file containing: - -#+begin_src sql -COMMENT ON TABLE film IS 'A film, aka movie …'; -COMMENT ON COLUMN film.title IS 'The full name …'; -… -COMMENT ON TABLE movie IS 'DEPRECATED: replaced …'; -… -#+end_src - -Compared to the ORG version, that SQL is pretty ugly – editing -(quoting, line-length/newlines, indentation, formatting) becomes quite -difficult. That’s why this tiny tool exists. - -There is a testable =docs/dbdoc.org= example (and its generated SQL -migration file =resources/migrations/20210804162056-dbdoc.up.sql=) in -this repo that was written to minimally describe the [[https://github.com/devrimgunduz/pagila][pagila toy -database]]. Just run =dbdoc= in the root of this repo to try it out! - -** Installation - -- Install [[https://github.com/babashka/babashka#installation][Babashka]] (any OS, tiny, fast, no dependencies). -- Clone this repo and put its root on your =PATH=. - -OR - -- Use [[https://github.com/babashka/bbin][bbin]] to install `dbdoc` as an - executable into =~/.local/bin=: - - #+begin_src shell - bbin install io.github.micahelliott/dbdoc - #+end_src - -Now you're ready to run =dbdoc= from anywhere, and that's all -there is to it! Not even any CLI options. :) - -** Documentation Process (Usage) - -*** One time only - -- Create a single living .org file in your repo, eg, =docs/dbdoc.org= - for growing docs for your tables. - -- Assuming you haven't already somehow written a =COMMENT= for your - DB, turn a SME analyst type or long-time developer or DBA in your - company loose to write up a bunch of notes in the org file. Then - edit a bit to ensure it's valid ORG that DBDoc can handle.. - -- Set up env vars to change default file locations (optional, not well - tested): - - #+begin_src shell - export DBDOC_ORG=docs/dbdoc.org - export DBDOC_SQL=resouces/migrations/-dbdoc.up.sql - export DBDOC_HTML=docs/dbdoc.html - #+end_src - -*** Continually (this is the only real process) - -1. Keep describing as many tables and columns as you see fit in your - =docs/dbdoc.org= file. Every time a developer changes or adds a - field or table, they also should put a sentence or two describing its - purpose in the org file. - -2. Run =dbdoc= to generate a time-stamped file like - =resources/migrations/20201027000000-dbdoc.up.sql=. IMPORTANT!! - Don't forget this step! (You don't need all the developers on the - teams do this, so long as /someone/ does the generation/migrating - once in a while.) - -3. Commit both the org and migration files. - -*** Optional - -- Generate HTML (from command line [[https://pandoc.org/][with Pandoc]] or [[https://stackoverflow.com/a/22091045/326516][Emacs]]) and publish - the new version to some site your company views (optional, see - =org2conflu.zsh= script). - -- If your migrations aren't automatic as part of your CI, run your - migration (or just load the new SQL file if you don't do - migrations). - -** Table Documentation Best Practices - -- Don’t need to be comprehensive and document every field when names - make them obvious -- Add an example datum for a column -- Used-by references: other tables (probably not FKs) and code areas -- Gotchas/quirks -- Add characteristic tags: deprecated/defunct, xl, hot, new, static, - performance, donttouch, dragons - -** Showing Comments in Clients - -- psql: =\d+= -- [[https://dataedo.com/kb/tools/dbeaver/how-to-view-and-edit-table-and-column-comments][dbeaver]] (HIGHLY RECOMMENDED!! the docs pop up everywhere) -- [[https://eggerapps.at/postico/][postico]] (see the _Structure_ tab, as shown is screenshot) -- [[https://postgrest.org/en/v7.0.0/api.html#openapi-support][postgrest/swagger]] -- [[https://dataedo.com/kb/tools/pgadmin/how-to-view-and-edit-table-and-column-comments][pgadmin]] -- [[https://dataedo.com/kb/tools/datagrip/how-to-view-and-edit-table-and-column-comments][datagrip]] ([[https://stackoverflow.com/questions/66129447/how-to-show-column-and-table-comment-in-jetbrains-datagrip][how to enable]]) - -** Read on if you want more details... - -*** Transformations - -The parser is limited and rigid and wants to see a _table_ -description paragraph for every table you wish to document. So, if you -want to document some column in a table, you must also provide at -least a tidbit sentence for the table too. It's not a robust parser so -just be careful. Alignment/indentation is important too, so follow the -example format precisely – this is a tiny subset of actual org. - -Org uses underscores for italic, and it’s tedious enough to have to -wrap every DB entity in equals (+=+) in org to escape them, so they -should instead be documented with hyphens ( =-= ) (though this isn’t -required). IOW, all ORG hyphenated variables (eg, =my-var-name=) -become underscores in SQL (=my_var_name=). So prefer to use -=my-var-name= in the ORG description. - -It you use “straight” apostrophes ('), they’ll be converted to -curlies so as not to need SQL string escaping (and be prettier). - -*** Git Diffs - -The first version of your migration file is a direct mapping from =dbdoc.org=: -it contains a =COMMENT ON= for each description. Then each time you run -=dbdoc=, that migration file is maintained but renamed and always has a 1-to-1 -mapping of org descriptions to =COMMENT ON=. - -The =dbdoc= script looks for an old migration file called -=-dbdoc.up.sql= and renames it (via =git-move=) to a -present timestamp. This enables Git to see the the new migration as -simply a change from the last run, and so you can easily see the -before/after diff. This also saves on a clutter of generating a bunch -of extra migrations. - -*** Doc Coverage - -You can track progress of your documenting by noting how many tables -have or have not been covered. Use the =coverage.zsh= script to offer -a simple coverage report. - -*** Seeding an ORG doc file for first-time use - -You can create a listing of all existing tables as a starter -ORG file: see =schema2org.zsh=. Once created, you can just start -documenting! This is probably totally buggy; it's a tiny sed script -working off a pg-dump. - -This may be improved to populate with existing comment descriptions -to enable “round-tripping”. - -*** Round-Tripping (postgres only, for now) - -If you already have comments on your tables, you can pull them into your ORG -doc (semi-manually) to still get the benefits of shared editing/viewing. So if -some of your team happens to add comments (inside a client, or with =COMMENT -ON= statements) to your production DB (instead of the using dbdoc process), -/round-tripping/ ensures you never lose data, keeping your =dbcoc.org= as the -SPOT and synced with the DB. But encourage your teammates not to be writing -=COMMENT ON= statements and use dbdoc instead! - -To run the round-tripper, dbdoc needs access to an up-to-date, running DB -instance. Export the =PGDATABASE= env var to specify that DB. It will query -for all the descriptions and send them into a TSV =indb.tsv=. Then it converts -the existing =dbdoc.org= texts (as inorg.tsv) to be able to diff and determine -what's new. Run =roundtrip.zsh= to see it. - -#+begin_src shell -PGDATABASE=mydb roundtrip.zsh >>docs/dbdoc.org # careful here with the append! -#+end_src - -That output contains org formatted text. Rather than dbdoc trying to inject -the new text into your hand-crafted =dbdoc.org= doc, it simply prints the new -data in org-format to /stdout/, so that you can paste it into the appropriate -places in your =dbdoc.org= file (or just append it as per that example). It is -alphabetized, so simply appending may not be wanted if you're trying to keep -your =dbdoc.org= file sorted by table name. - -If there are conflicts (same field described in both ORG and DB), those are -WARNINGs printed to /stderr/, and you're expected to resolve and paste them into -your =dbdoc.org= file with the description you feel is most up-to-date. - -*** FAQs - -*Why use org instead of the more popular/common markdown?* - -ORG has definition lists which work great for column docs. For the -limited syntax that is DBDoc, org and md are effectively the same -(just use =*= for heading instead of =#=). - -But [[https://github.com/MicahElliott/dbdoc/issues/2][I will implement Markdown]] if anyone feels they need it. - -*Do I need Emacs to work with Org files?* - -No! Emacs is not required to for any part of DBDoc. Most common -editors have some proper way to work with Org. Even if yours doesn't, -just edit in plain text mode. - -*How far should I go with documenting my tables?* - -Not super far. See recommendations above. I like to limit column docs -to not more than a few sentences. A table doc can be a legthy -paragraph (only one!). Your source code docstrings are probably a -better place to get into the nitty gritty. - -*Why not just write the doc strings in SQL?* - -Then your editor would think you’re in SQL mode and wouldn’t do things -like spell-checking or nice formatting. Plus, using ORG gives you a -publishable HTML version of your docs. - -*Does this work for all databases?* - -It does work for many! It's been tested with PostgreSQL, and should -work with others too, such as: - -- [[https://www.cockroachlabs.com/docs/stable/comment-on.html][CockroachDB]] -- [[https://docs.oracle.com/cd/B19306_01/server.102/b14200/statements_4009.htm][Oracle]] -- [[https://www.ibm.com/support/producthub/db2/docs/content/SSEPGG_11.5.0/com.ibm.db2.luw.sql.ref.doc/doc/r0000901.html][IBM DB2]] -- [[https://docs.snowflake.com/en/sql-reference/sql/comment.html][Snowflake]] -- [[https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Statements/COMMENT/COMMENTONTABLE.htm?tocpath=SQL%20Reference%20Manual%7CSQL%20Statements%7CCOMMENT%C2%A0ON%C2%A0Statements%7C_____9][Vertica]] - -[[https://issues.apache.org/jira/browse/DERBY-7008][Apache Derby may get support]]. - -[[https://stackoverflow.com/questions/7426205/sqlite-adding-comments-to-tables-and-columns][I don't think SQLite supports =COMMENT=.]] And [[https://stackoverflow.com/questions/2162420/alter-mysql-table-to-add-comments-on-columns][MySQL makes it very -difficult]] (and [[https://stackoverflow.com/questions/58665398/modifing-comment-into-spark-table-on-databricks][Spark]]) to the point that DBDoc won't attempt to make it -work. [[https://feedback.azure.com/forums/307516-sql-data-warehouse/suggestions/16317988-table-extended-properties][SQL Server/Azure is a fail too]]. And [[https://community.cloudera.com/t5/Support-Questions/Is-there-way-to-add-comment-to-a-phoenix-table/td-p/165405][Ignite]]. - -*How do I get this into Confluence without API access?* - -Your Confluence setup might only support creating a page from markdown -(not org or html). So you can use pandoc to convert from org to md -with: =pandoc -s docs/dbdoc.org -o temp.md= and then paste it into -Confluence from its "plus" menu while editing a page: -/Markup > Markdown > Paste > Insert/ - -*Why can't I use just my SQL client to add descriptive comments?* - -Because it seems wrong. Which copy of your DB are you wanting to -modify? Are you connecting your client to a production DB and making -edits to prod data? This doesn't make sense to me and I don't -understand why SQL clients support =COMMENT= editing. Developers, DBAs, -QA, and others may not have prod access, and probably all need -different non-prod DBs to have up-to-date documentation at their -fingertips, and DBDoc enables putting that documentation into every -instance. - -** Similar Tools Comparison - -*** dbdocs (same name but plural!) - -[[https://dbdocs.io/][dbdocs]] (plural) is decsribed as: "A free & simple tool to create -web-based database documentation using DSL code. Designed for -developers. Integrate seamlessly with your development workflow." As a -full DDL DSL, it is a much heavier commitment to incorporate. It also -creates a rich website for your tables, whereas /DBDoc/ just creates a -single webpage that can be synced with Confluence or published -wherever you choose. dbdocs creates ERDs, but /DBDoc/ lets capable -clients like DBeaver handle that for you. - -*** Rails ActiveRecord - -The [[https://github.com/rails/rails/pull/22911][ActiveRecord ORM]] has the ability to support comments as part of a -schema definition and migration syntax. You may not need DBDoc if -you're using AR. But if you want to publish your schema documentation, -you should still use DBDoc! - -*** Commercial Tools - -There are many DB documentation tools in this realm. For any use cases -I've encountered, they are overkill. But if you're interested in much -more sophisticated kitchen sink tools that may work with other types -of DBMSs, look into [[https://www.apexsql.com/sql-tools-doc.aspx][ApexSQL]], [[https://www.red-gate.com/products/sql-development/sql-doc/][Redgate]], and [[https://dataedo.com/][Dataedo]]. - -** Future Enhancements - -- Support =COMMENT ON DATABASE= as top-level paragraph (but ignore - myriad other types). *Actually, this can't be done flexibly since it - requires knowing the DB name.* - -- Identify fields/tables that are missing comments - -Please submit an issue if you think of any enhancements or find bugs. -I'm eager to improve this, but need your ideas! - -** More Screenshots - -Hover to see captions, just like in DB clients! There, you've been trained. - -#+html:

-#+caption: DBeaver properties view - -#+html:

-#+caption: Postico "structure" view with doc snippets in red - -#+html:

-#+caption: Datagrip tree table hover - -#+html:

-#+caption: Datagrip column hover - -#+html:

-#+caption: Datagrip tree view comments - -#+html:

-#+caption: Emacs Org slide presentation view diff --git a/_config.yml b/_config.yml new file mode 100644 index 0000000..c741881 --- /dev/null +++ b/_config.yml @@ -0,0 +1 @@ +theme: jekyll-theme-slate \ No newline at end of file diff --git a/analyze.zsh b/analyze.zsh deleted file mode 100644 index 2d35245..0000000 --- a/analyze.zsh +++ /dev/null @@ -1,20 +0,0 @@ -# See frequencies for table usage in TSV - -# Example: -# ... -# 15 rate_sheet_entry -# 16 project_root_doc_pkgs -# 24 rate_sheet -# 28 loan_type -# 36 capitalprovider -# 50 project_root - - -nqueries() { grep -EiI "(from|join) +.*\b${1}\b" *.sql | wc -l } -files() { grep -EiI "(from|join) +.*\b${1}\b" *.sql | cut -d: -f1 | sort -n | uniq } - -cd resources/sql -grep -E '^\* ' ../../docs/dbdoc.org | - gsed -r -e 's/^\* //' -e 's/-/_/g' | - while read -r table; do print "$(nqueries $table)\t$table"; done | - sort -n diff --git a/bb.edn b/bb.edn deleted file mode 100644 index cb4d332..0000000 --- a/bb.edn +++ /dev/null @@ -1 +0,0 @@ -{:bbin/bin {dbdoc {:main-opts ["-f" "dbdoc"]}}} diff --git a/coverage.zsh b/coverage.zsh deleted file mode 100755 index 907594c..0000000 --- a/coverage.zsh +++ /dev/null @@ -1,21 +0,0 @@ -#! /bin/zsh - -# TODO Annotate each missing table line with the git-blame of who (dev name) did the CREATE TABLE in migration files - -db=${DBDOC_DB?provide DB name as seed} - -# Array of public tables -tables=( $(rg 'CREATE TABLE public\.[A-z]+ ' $db | sed -r -e 's/CREATE TABLE public\.//' -e 's/ \(//') ) -tcovered=( $(rg 'COMMENT ON TABLE public\.[A-z]+ IS' $db | sed -r -e 's/COMMENT ON TABLE public\.//' -e "s/ IS '.*//") ) - -# print -l $tables -# print -l $tcovered - -integer ncovs=$(print -l $tcovered | wc -l) -integer ntabs=$(print -l $tables -l | wc -l) - -print "### Tables missing docs ($(( ntabs - ncovs )))" - -comm -23 <(print -l $tables | sort) <(print -l $tcovered | sort) - -printf "\n### Coverage: %0.2f%% (%d/%d)\n" $(( $ncovs / $ntabs. )) $ncovs $ntabs diff --git a/datagrip1.png b/datagrip1.png deleted file mode 100644 index 3cf81c8..0000000 Binary files a/datagrip1.png and /dev/null differ diff --git a/datagrip2.png b/datagrip2.png deleted file mode 100644 index ac6666f..0000000 Binary files a/datagrip2.png and /dev/null differ diff --git a/datagrip3.png b/datagrip3.png deleted file mode 100644 index 4ba80b7..0000000 Binary files a/datagrip3.png and /dev/null differ diff --git a/dbdoc b/dbdoc deleted file mode 100755 index 46f6488..0000000 --- a/dbdoc +++ /dev/null @@ -1,222 +0,0 @@ -#!/usr/bin/env bb - -;; Not working flexibly when classpath specified -;#!/usr/bin/env bb --classpath ...proj/dbdoc --main dbdoc - -;; Convert a simple ORG file into SQL COMMENTs for documenting tables -;; and their columns. -;; -;; Environmnent variables for controlling behavior: -;; - DBDOC_SQL -;; - DBDOC_ORG -;; - DBDOC_HTML - -(ns dbdoc - (:require - [babashka.fs :as fs] - [clojure.pprint :as pp] - [clojure.data.csv :as csv] - [clojure.java.shell :refer [sh]] - [clojure.java.io :as io] - [clojure.string :as str]) - (:import - (java.time.format DateTimeFormatter) - (java.time LocalDateTime))) ; not really needed since babashka - -;; https://stackoverflow.com/a/62970721/326516 -(def timestamp - "Migration-compatible timestamp (as seen in Migratus and elsewhere). - eg, `20210205111716`" - (.format (LocalDateTime/now) (DateTimeFormatter/ofPattern "yyyyMMddHHmmss"))) - -;; Default files for user to configure -(def sql-file "Output SQL (migration) file containing timestamp" - (or (System/getenv "DBDOC_SQL") - (format "resources/migrations/%s-dbdoc.up.sql" timestamp))) -(def mig-dir "Migration directory, for deletion of old migs" - (.getParent (io/file sql-file))) -(def org-file "Input file containing table/col descriptions" - (let [f (or (System/getenv "DBDOC_ORG") "docs/dbdoc.org")] - (if-not (fs/exists? f) - (do (println "ERROR: input org-file does not exist") - (System/exit 1)) - f))) -(def html-file "Output HTML file for publishing" - (or (System/getenv "DBDOC_HTML") "docs/dbdoc.html")) - -(def roundtrip? "Run in round-trip mode: only generate diff" - (atom false)) - - ; -;;; Translation - -(defn check-dir - "Check for existence of directory for file `f`." - [f] - ;; (println "Checking for existence of directory:" f) - (let [dir (.getParent (io/file f))] - (when-not (.isDirectory (io/file dir)) - (println "ERROR:" dir "directory does not exist.") - (System/exit 1)))) - -(defn xlate-col - "Construct a dotted `schema.table.column` name. - Replace hyphens w/ underscores, add `public` if was implicit." - [tab col] - (let [colname (-> (str tab "." (str/replace col #"(?s)^- (.*) ::.*" "$1")) - (str/replace #"-" "_")) - fieldsv (str/split colname #"\.") ; customer.active OR myschema.customer.active - fields (str/join \tab (if (= 2 (count fieldsv)) (into ["public"] fieldsv) fieldsv)) - text (-> (str/join " " (rest (str/split col #"\n"))) - str/trim - (str/replace #"(?s)\s+" " ") - (str/replace #"(?s)'" "’"))] - [colname fields text])) -;; (xlate-col "myschema.mytab" "- foo ::\n This is the description.") - -(defn filt-gt1 [fields] (seq (filter #(< 1 (second %)) (frequencies fields)))) - -(defn check-duplicate-fields [tabname fieldstrs] - (let [fields (reduce (fn [acc cur] (cons (first (xlate-col tabname cur)) acc)) [] fieldstrs)] - (when-let [gt1s (filt-gt1 fields)] (println "WARNING: duplicate fields:" gt1s)))) - -(defn process-col - "Convert a column description into a SQL COMMENT. - Write roundtrip TSV when in mode." - [tab col] - (let [[colname fields text] (xlate-col tab col) - cmt (format "COMMENT ON COLUMN %s IS '%s';\n--;;" colname text)] - (when @roundtrip? (.println *err* (format "%s\t%s" fields text))) - cmt)) - -(comment ; get fields for single section, for matching with roundtrip tsv - (def org (slurp "docs/dbdoc.org")) - (def sec (first (rest (str/split org #"(?m)^\* ")))) - (let [[tab _ & defs] (str/split sec #"\n\n")] - [tab (mapv #(apply str (drop 2 (first (str/split % #" ::")))) defs)]) - ;;=> ["film" ("title" "description" "original-language-id" "rental-rate" "length" "rating")] - :end) - -(defn xlate-section - "Convert a section of table descriptions and combine with columns." - [sec] - (let [[tabname tabdesc & cols] (str/split sec #"\n\n") - tabname (-> tabname - (str/replace #"(?s)\n" " ") - (str/replace #"-" "_")) - tabdesc (-> tabdesc - (str/replace #"(?s)\n" " ") - (str/replace #"(?s)'" "’") - str/trim)] - [tabname cols tabdesc])) - -(defn check-duplicate-sections [secstrs] - ;; TODO Inside the reduce fn is the place to check for dupe fields, if that's important enough - (let [secs (reduce (fn [acc cur] - (let [[tabname cols _] (xlate-section cur)] - (check-duplicate-fields tabname cols) - (cons tabname acc))) - [] secstrs)] - (when-let [gt1s (filt-gt1 secs)] (println "WARNING: duplicate tables:" gt1s)))) - -;; (seq (filter #(< 1 (second %)) (frequencies ["film" "rental" "actor" "fooschema.customer" "film"]))) -;; => {"film" 2, "rental" 1, "actor" 1, "fooschema.customer" 1} - -(defn process-section - "Create section COMMENTs." - [sec] - (let [[tabname cols tabdesc] (xlate-section sec) - ;; _ (println (format "%s\t%s" tabname tabdesc)) - cmt (format "COMMENT ON TABLE %s IS '%s';\n--;;" tabname tabdesc)] - (cons cmt (mapv #(process-col tabname %) cols)))) - -(defn move-old-migration - "Rename older migration file(s). - Assume was successful in creating a new mig." - [] - (when-let [old (first (fs/glob mig-dir "*-dbdoc.up.sql" {:max-depth 1}))] - (if (= 0 (:exit (sh "git" "ls-files" "--error-unmatch" (.toString old)))) - (do (println "Moving old COMMENTs mig via git: " (.toString old)) - (sh "git" "mv" (.toString old) sql-file)) - (println "WARNING: Detected old dbdoc file not in git:" (.toString old))))) - -(defn print-comments-file ; FIXME change name - "Write all SQL comments to `sql-file`." - [title comments] - (move-old-migration) - (println "To new SQL COMMENTs file: " sql-file) - (binding [*out* (io/writer sql-file)] - (println title) - (println "-- DO NOT EDIT THIS FILE; SEE dbdoc.org FILE") - ;; XXX No way to know the name of the database! - ;; Unless we look for env var like PGDATABASE - ;; (println (format "COMMENT ON DATABASE %s IS '%s';\n--;;" ??? dbdesc)) - ;; TODO Could wrap in a BEGIN/COMMIT transaction here, but some migrators - ;; do this automatically - (doseq [c comments] - (println) - (doseq [i c] - (println i))))) - - ; -;;; Roundtrip (none of these actually used; failed experiment) - -(defn recase [s] (str/replace s #"_" "-")) - -(defn rt-parse-rows - "Build up a sorted map in pretty org format for printing." - [rows] - (reduce (fn [acc [tname field desc]] - (let [tname (recase tname), field (recase field)] - (update acc tname conj (str "- " field " ::\n " desc "\n")))) - (sorted-map) - rows)) - -(defn rt-parse-tsv [] - (with-open [r (io/reader "dbdoc-public.tsv")] - (let [rows (csv/read-csv r :separator \tab)] - (prn (take 8 rows)) - (rt-parse-rows rows)))) - -(defn roundtrip [] - (println "Running round-trip. Paste the following into appropriate places in your dbdoc.org file.\n") - (mapv (fn [[table items]] - (println "*" table "\n") - (mapv println items)) - (rt-parse-tsv))) - - ; -;;; Main - -(defn genmig - "Normal main, generate a migration from dbdoc.org file." - [] - (check-dir sql-file) - (let [;; To test, set DBDOC_ORG=example.org - _ (println "Reading ORG descriptions input file:" org-file) - org (slurp org-file) - title (str/replace (first (str/split org #"\n\n+")) - #"#\+Title: " "-- ") - ;; dbdesc (str/replace (first (str/split org #"(?m)^\* ")) #"#\+Title: " "") - sections (rest (str/split org #"(?m)^\* ")) - ;; _ (pp/pprint sections), _ (println "\n\n") - _ (check-duplicate-sections sections) - comments (mapv process-section sections)] - (when-not @roundtrip? - (print-comments-file title comments)))) - -(defn usage [] - (println "DBDoc generates sql COMMENT statements from a dbdoc.org file.") - (println "Run with no arguments to generate a migration file.") - (println "Run with 'roundtrip' argument to print TSV for use with tsv2org script.")) - -(defn -main [& args] - (let [[arg0 arg1] *command-line-args*] - (if-not arg1 - (genmig) - (if (or (= arg1 "roundtrip") (System/getenv "DBDOC_ROUNDTRIP")) - (do (reset! roundtrip? true) (genmig)) - (do #_(println "arg1:" arg1) (usage)))))) - -;; (println "in dbdoc") -(-main) diff --git a/deps.edn b/deps.edn deleted file mode 100644 index 0967ef4..0000000 --- a/deps.edn +++ /dev/null @@ -1 +0,0 @@ -{} diff --git a/docs/dbdoc.org b/docs/dbdoc.org deleted file mode 100644 index 2bf69c9..0000000 --- a/docs/dbdoc.org +++ /dev/null @@ -1,68 +0,0 @@ -#+Title: Pagila DB Documentation - -Some description of whole db. Keep it to exactly one paragraph. - -* film - -A *film*, aka movie, is released initially in theaters, and then -available to movie /stores/, at which point they become available to -the DB. - -- title :: - The full name of the film, including things like sub-title and part - in a series; does not include language - -- description :: - A brief synopsis (catchy prose) about the plot - -- original-language-id :: - Spoken language, defaults to English - -- rental-rate :: - The price in US dollars and cents the /store/ charges for a - short-duration rental - -- length :: - Duration in minutes of the film; does not include any previews - -- rating :: - MPAA rating: G, PG, PG-13, R, etc - -* fooschema.customer - -A movie store patron. A customer visits the store to rent a movie. We -register them as a customer when they rent their first film. - -- activebool :: - A true value indicated that they have successfully rented at least - one movie. Also set to =false= after 3 years of not renting any - movie. - -- create-date :: - The date that the customer creates an account; may be different from - first rental date - -* actor - -An actor is very simple and non-comprehensive table to record the main -headlining /stars/ of the film. All fields are obvious. Note that -there may be duplicate actors that use slightly different names on occasion. - -* rental - -An instance of a film being rented from the /store/. Note that a -delinquent customer will not be allowed to create a new rental. #hot - -- return-date :: - The date that a film is returned to the store in a valid state, - which means properly rewound and in good working condition - -- rental-date :: - The date at which a customer rents a movie - -- return-date :: - This is a dupe field - -* actor - -This is a dupe table. diff --git a/example.org b/example.org deleted file mode 100644 index 908a63e..0000000 --- a/example.org +++ /dev/null @@ -1,30 +0,0 @@ -#+Title: ACME Corp Customer Database - -* customer - -The monster that holds the precious user profiles. Used by the traffic -light table to track which users … - -- email :: - The primary contact, taken from Salesforce’s - =Project.Primary_Contact=. This is redundant with our =lead.contacs= - data. - -- email-data-json :: - ... - -* ops-user - -DEPRECATED: replaced by =agent= - -* order - -Every purchase made by =customer=s through the old portal. Note that -new purchases by customer through all other systems is recorded in the -=order2= table! - -- foo :: - arst - -- bar :: - qwfp diff --git a/film-hover.png b/film-hover.png deleted file mode 100644 index 6ecd1dc..0000000 Binary files a/film-hover.png and /dev/null differ diff --git a/film-table.png b/film-table.png deleted file mode 100644 index fdc9f8d..0000000 Binary files a/film-table.png and /dev/null differ diff --git a/film-table2.png b/film-table2.png deleted file mode 100644 index 42f83d9..0000000 Binary files a/film-table2.png and /dev/null differ diff --git a/miglint b/miglint deleted file mode 100755 index ea75607..0000000 --- a/miglint +++ /dev/null @@ -1,44 +0,0 @@ -#! /bin/zsh - -# Check migration file for existence of unsavory COMMENT ON statement, and reject build - -# Add lines like this to a `build.yml` file for github actions: - -# checks: -# steps: -# - name: Check for and block any COMMENT ON statements in mig files, insist on dbdoc -# run: ./deploy/bin/miglint - -# TODO Move this script to captain: https://github.com/MicahElliott/captain -# This will enable detecting which mig file etc. - -# Reject migs that have COMMENT ON statements, since we want those in dbdoc instead - -# NOTE If you absolutely must add a comment on, you can bypass this check by -# naming your mig like `dbdocbypass-whatever...` - -echo 'Checking for comment on statements in any migs' - -# Match mig files that are not dbdoc migs -git fetch --quiet origin master:refs/remotes/origin/master -migs=( $(git diff --name-only origin/master HEAD | grep -v dbdoc | grep '\.up\.sql') ) - -if (( ${#migs[@]} == 0 )); then echo -e "No migs in this PR\nOK"; exit; fi - -culprits=( $(grep -Pilzo 'comment on ' $migs) ) - -if (( ${#culprits[@]} > 0 )) ; then - echo "ERROR!" - echo "ERROR! You added a COMMENT ON statement in a migration file." - echo "ERROR!" - echo -e "\nCULPRITS:\n$culprits" - echo -e "\nWe are now enforcing that DB documentation should be done by adding entries to the docs/dbdoc.org file." - echo 'Please remove the comment and add it instead to PROJROOT/docs/dbdoc.org' - echo 'DBDoc is a setup we use to maintain database documentation is an org-file,' - echo 'which can be shared elsewhere and easily referenced.' - echo 'More info: https://github.com/MicahElliott/dbdoc' - - exit 1 # block the build -else - echo "OK" -fi diff --git a/org-preso.png b/org-preso.png deleted file mode 100644 index 0409333..0000000 Binary files a/org-preso.png and /dev/null differ diff --git a/org2conflu.zsh b/org2conflu.zsh deleted file mode 100755 index 1f01408..0000000 --- a/org2conflu.zsh +++ /dev/null @@ -1,17 +0,0 @@ -#! /bin/zsh - -# Convert a dbdoc.org file to html and send to basic auth'd confluence -# https://developer.atlassian.com/server/confluence/confluence-rest-api-examples/#update-a-page - -user=${CONFLUENCE_USER?provide confluence username} -pswd=${CONFLUENCE_PASSWORD?provide confluence basic auth password} -endpoint=${CONFLUENCE_ENDPOINT?provide confluence URL endpooint} - -print "Generating HTML from dbdoc.org" -html=$(pandoc -s $docs/dbdoc.org) - -print "Sending dbdoc generated HTML to Confluence page DBDoc" -curl -u ${user}:${pswsd} -X PUT -H 'Content-Type: application/json' \ - -d '{"id":"3604482","type":"page", "title":"DBDoc","space":{"key":"TST"},"body":{"storage":{"value": '"$html"', "representation":"storage"}}, "version":{"number":2}}' \ - $endpoint -# http://localhost:8080/confluence/rest/api/content/3604482 diff --git a/postico.png b/postico.png deleted file mode 100644 index 7d1d7bd..0000000 Binary files a/postico.png and /dev/null differ diff --git a/resources/migrations/20241231132314-dbdoc.up.sql b/resources/migrations/20241231132314-dbdoc.up.sql deleted file mode 100644 index 4dca735..0000000 --- a/resources/migrations/20241231132314-dbdoc.up.sql +++ /dev/null @@ -1,39 +0,0 @@ --- Pagila DB Documentation --- DO NOT EDIT THIS FILE; SEE dbdoc.org FILE - -COMMENT ON TABLE film IS 'A *film*, aka movie, is released initially in theaters, and then available to movie /stores/, at which point they become available to the DB.'; ---;; -COMMENT ON COLUMN film.title IS 'The full name of the film, including things like sub-title and part in a series; does not include language'; ---;; -COMMENT ON COLUMN film.description IS 'A brief synopsis (catchy prose) about the plot'; ---;; -COMMENT ON COLUMN film.original_language_id IS 'Spoken language, defaults to English'; ---;; -COMMENT ON COLUMN film.rental_rate IS 'The price in US dollars and cents the /store/ charges for a short-duration rental'; ---;; -COMMENT ON COLUMN film.length IS 'Duration in minutes of the film; does not include any previews'; ---;; -COMMENT ON COLUMN film.rating IS 'MPAA rating: G, PG, PG-13, R, etc'; ---;; - -COMMENT ON TABLE fooschema.customer IS 'A movie store patron. A customer visits the store to rent a movie. We register them as a customer when they rent their first film.'; ---;; -COMMENT ON COLUMN fooschema.customer.activebool IS 'A true value indicated that they have successfully rented at least one movie. Also set to =false= after 3 years of not renting any movie.'; ---;; -COMMENT ON COLUMN fooschema.customer.create_date IS 'The date that the customer creates an account; may be different from first rental date'; ---;; - -COMMENT ON TABLE actor IS 'An actor is very simple and non-comprehensive table to record the main headlining /stars/ of the film. All fields are obvious. Note that there may be duplicate actors that use slightly different names on occasion.'; ---;; - -COMMENT ON TABLE rental IS 'An instance of a film being rented from the /store/. Note that a delinquent customer will not be allowed to create a new rental. #hot'; ---;; -COMMENT ON COLUMN rental.return_date IS 'The date that a film is returned to the store in a valid state, which means properly rewound and in good working condition'; ---;; -COMMENT ON COLUMN rental.rental_date IS 'The date at which a customer rents a movie'; ---;; -COMMENT ON COLUMN rental.return_date IS 'This is a dupe field'; ---;; - -COMMENT ON TABLE actor IS 'This is a dupe table.'; ---;; diff --git a/resources/migrations/dbdocs.sql b/resources/migrations/dbdocs.sql deleted file mode 100644 index da6c594..0000000 --- a/resources/migrations/dbdocs.sql +++ /dev/null @@ -1,11 +0,0 @@ --- ACME Corp Customer Database - -COMMENT ON TABLE customer IS 'The monster that holds the precious user profiles. Used by the traffic light table to track which users …'; -COMMENT ON COLUMN customer.email IS 'The primary contact, taken from Salesforce’s =Project.Primary_Contact=. This is redundant with our =lead.contacs= data.'; -COMMENT ON COLUMN customer.email_data_json IS '...'; - -COMMENT ON TABLE ops-user IS 'DEPRECATED: replaced by =agent='; - -COMMENT ON TABLE order IS 'Every purchase made by =customer=s through the old portal. Note that new purchases by customer through all other systems is recorded in the =order2= table!'; -COMMENT ON COLUMN order.foo IS 'arst'; -COMMENT ON COLUMN order.bar IS 'qwfp'; diff --git a/roundtrip.clj b/roundtrip.clj deleted file mode 100755 index 061c0a1..0000000 --- a/roundtrip.clj +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bb - -(ns roundtrip - (:require - [clojure.string :as str] - [clojure.pprint :as pp] - [clojure.java.io :as io] - [clojure.data.csv :as csv] - [dbdoc :as dbd])) - -(defn recase [s] (str/replace s #"_" "-")) - -(defn parse-rows - "Build up a sorted map in pretty org format for printing." - [rows] - (reduce (fn [acc [tname field desc]] - (let [tname (recase tname) field (recase field)] - (update acc tname conj (str "- " field " ::\n " desc "\n")))) - (sorted-map) - rows)) - -(defn parse-tsv [] - (with-open [r (io/reader "dbdoc-public.tsv")] - (let [rows (csv/read-csv r :separator \tab)] - (prn (take 8 rows)) - (parse-rows rows)))) - -(defn main [] - (mapv (fn [[table items]] - (println "*" table "\n") - (mapv println items)) - (parse-tsv))) - -(def xs - [["api_requests_log" "data" "EDN with request data"] - ["audit_trail_events" "change_type" "UPDATE, CREATE, etc"] - ["audit_trail_events" "commit_user" "sfdc user ID"] - ["audit_trail_events" "entity_name" "dsProject, dsAsset, etc"] - ["audit_trail_events" "json_data" "JSON containing more SF details of event"] - ["audit_trail_events" "record_id" "??"] - ["auth_password_reset" "complete" "boolean indicating if reset finished"] - ["auth_password_reset" "email" "user being reset"]]) -;; (pp/pprint (parse-rows xs)) diff --git a/roundtrip.zsh b/roundtrip.zsh deleted file mode 100755 index 368fcba..0000000 --- a/roundtrip.zsh +++ /dev/null @@ -1,37 +0,0 @@ -#! /bin/zsh - -# https://stackoverflow.com/a/4946306/326516 - -lines=$( psql -t -A -F' ' -c ' -select - c.table_schema, - c.table_name, - c.column_name, - pgd.description -from pg_catalog.pg_statio_all_tables as st -inner join pg_catalog.pg_description pgd on ( - pgd.objoid = st.relid -) -inner join information_schema.columns c on ( - pgd.objsubid = c.ordinal_position and - c.table_schema = st.schemaname and - c.table_name = st.relname -);' -) - -# Get db lines sorted into file -# print $lines | sort | grep '^public' | gcut -f2-4 >dbdoc-public.tsv -print $lines | sort >indb.tsv - -# Get doc lines sorted into file -dbdoc.clj roundtrip 2>&1 |sort >inorg.tsv - -# See just the entries unique to db -# Stupid mac really should be using gnu comm (gcomm), -comm -23 indb.tsv inorg.tsv >dbonly.tsv - -# Convert TSV lines into ORG format, print newly needed entries and conflicts -tsv2org - -# print '\nYou may want to delete the temp files now.' -# print ' rm indb.tsv inorg.tsv dbonly.tsv' diff --git a/schema2org.zsh b/schema2org.zsh deleted file mode 100755 index 304e818..0000000 --- a/schema2org.zsh +++ /dev/null @@ -1,25 +0,0 @@ -#! /bin/zsh - -# Convert a schema dump from SQL DDL to a seed of ORG for filling out docs - -db=${DBDOC_DB?provide DB name as seed} - -print "#+Title: $DBDOC_DB\n" - -print \ -"This is the dbdoc description file for the database. See the -[[https://github.com/micahelliott/dbdoc][dbdoc README]] -for more detailed instructions on its purpose and expanding it. This file -contains short documentation for any tables and columns that could use -even the slightest bit of explanation. - -Edit this file whenever you make schema changes. And be a good citizen -by helping to grow this file any time you're touching a table! -The remainder of this file will be used processed into comment -descriptions that will be visible in your SQL client, and can also be -exported as HTML. -" - -pg_dump -s $db | - sed -e '/^CREATE TABLE public\.[a-z_]* (/,/^)\;/!d' -e 's/CREATE TABLE public\.//' | - gsed -r -e 's/^ ([a-z_]+) .*/- \1 ::/' -e 's/ \($//' -e 's/\);//' -e 's/^([a-z]+)/* \1/' diff --git a/tsv2org b/tsv2org deleted file mode 100755 index 27fd496..0000000 --- a/tsv2org +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env bb - -(ns tsv2org - "Identify inconsistencies between DB and ORG file, and print pasteable entries for ORG. - - Called by `roundtrip.zsh` with pre-generated TSVs. - - This includes items missing from ORG, and items in both DB and ORG that have are different. - This situation often occurs when: - - someone on the team (or outside) has been adding COMMENT statements manually - - a DB you're getting started with already has existing COMMENTs - " - (:require - [clojure.string :as str] - [clojure.pprint :as pp] - [clojure.data :as cd] - [clojure.data.csv :as csv])) - -(def projdir "./") ; Maybe let this project directory be changeable. - -(defn camelize [s] (str/replace s #"_" "-")) -(defn read-tsv [fname] (csv/read-csv (slurp fname) :separator \tab)) - -(defn reshape - "Create a map of keys like `myschema.mytable.myfield` to field descriptions. - Drop the `public` prefix for schema since default." - [acc row] - (let [row (into (mapv camelize (take 3 row)) [(last row)]) - schema (if (= (first row) "public") (second row) (str/join \. (take 2 row)))] - ;; (prn [schema (drop 2 row)]) - ;; => ["credit-decisioning.applicant" ("created-at" "when this row was created; used for archival")] - (cons [schema (drop 2 row)] acc))) - -;; Translate the 4-col TSV to ORG, to stdout - -(defn print-missing-from-org - "Find fields present in DB that are missing from ORG file. - Requires that a `dbonly.tsv` file has already been created by upstream script." - [] - (let [rows (read-tsv (str projdir "dbonly.tsv")) - struct (reduce reshape (sorted-map) rows) - dict (reduce (fn [acc [tab data]] (update acc tab conj data)) (sorted-map) struct #_xs)] - ;; (pp/pprint (reduce reshape {} rows)) - (.println *err* "The following fields (sorted by table name, to stdout)") - (.println *err* " do not exist in your dbdoc.org file, or are in conflict (see stderr below).") - (.println *err* "You should paste them in from here.") - (mapv (fn [[tab pairs]] - (println (str "\n* " tab)) - (mapv (fn [[field desc]] (println (str "\n- " field " ::\n " desc))) pairs)) - dict))) - -(defn rows->map - "Convert TSV rows into a map keyed by first 3 cols." - [rows] - (reduce (fn [acc [scm tab fld desc]] - (assoc acc (str/join \. [scm tab fld]) desc)) - {} rows)) - -(defn print-conflicts-between-both - "Find fields present in both DB and ORG that have dissimilar descriptions." - [] - (let [;;dbmap {:a 11, :b 22, :c 33, :z "diff"} - ;; orgmap {:a 11, :b :whoa, :d 33, :z "alsodiff"} - dbmap (rows->map (read-tsv (str projdir "indb.tsv"))) - orgmap (rows->map (read-tsv (str projdir "inorg.tsv"))) - ;; Grab the third coll from diff, the matches - same-keys (nth (cd/diff (set (keys dbmap)) (set (keys orgmap))) 2) - db2 (select-keys dbmap same-keys) - org2 (select-keys orgmap same-keys)] - ;; (pp/pprint db2) (pp/pprint org2) - (mapv (fn [k] (let [dbk (get db2 k), orgk (get org2 k)] - (when (not= dbk orgk) - (.println *err* (format "\nCONFLICT FOR FIELD: %s\n db: %s\n org: %s" k dbk orgk))))) - same-keys))) - - ; -;;; Main - -(print-missing-from-org) -(print-conflicts-between-both) - - ; -;;; Experimenting - -(comment - (def xs [["credit-decisioning.applicant" - '("loanquote-data" "loanquote data used as input to the policies")] - ["credit-decisioning.applicant" - '("credit-facts" "facts from the credit reports used as input to the policies")] - ["credit-decisioning.boobs" - '("created-at" "when this row was created; used for archival")]]) - :end)