Parse HTML and build a map from the parsed value using clojure - html

I am using enlive clojure to parse HTML. My parser looks like;
(def each-rows
(for [tr crawl-page
:let [row (html/select tr [:td (attr= :class "bl_12")])]
:when (seq row)]
row))
which extracts result as following;
{:tag :a,
:attrs
{:class "bl_12",
:href
"url1"},
:content ("Chapter 1")}
{:tag :a,
:attrs
{:class "bl_12",
:href
"url2"},
:content ("Chapter 2")}
{:tag :a,
:attrs
{:class "bl_12",
:href
"url3"},
:content ("Chapter 3")}
Now my objective is to get a dictionary like this;
{:Chapter_1 "url1"
:Chapter_2 "url2"
:Chapter_3 "url3"}
I managed to write a method which extracts only href or only content, but couldn't make it as a map
(defn read-specific-other [x]
(map (comp second :attrs) x))
output : [:href "url1"]
(defn read-specific-content [x]
(map (comp first ::content) x))
(map read-specific-content each-rows)
output :
(("Chapter 1"
"Chapter 2"
"Chapter 3"
))
How do I get the desired result

Take a look at zipmap
(zipmap (read-specific-other each-rows) (read-specific-content each-rows))
If you really want the keys to be keywords, then use the keyword function; but I recommend keeping strings as the keys.
Also consider using an into for pattern instead:
(into {}
(for [[{:keys [attrs]} {:keys [content]}] rows]
[content attrs]))

Related

How can I prevent the closure compiler from minifying certain methods in clojurescript?

I'm integrating quilljs with my clojurescript application. I'm including it in my project.cljs file like so: [cljsjs/quill "1.3.5-0"].
The compiler is minifying some methods and is causing an error:
function xA(a, b) {
var c = t(a).getSelection(!0)
, d = c.index
, e = c.length
, h = Quill.import("delta");
c = function(b, c, d, e) {
return function(b) {
return t(a).updateContents((new e).rf(c).delete(d).nf({
image: b
}))
}
}(c, d, e, h);
return b.c ? b.c(c) : b.call(null, c)
}
This is the error: Uncaught TypeError: (intermediate value).rf is not a function
The clojurescript code looks like this:
(defn file-recieve-handler [this cb]
(let [range (.getSelection #this true)
index (.-index range)
length (.-length range)
delta (.import js/Quill "delta")]
(cb (fn [url]
(.updateContents #this
(.insert
(.delete
(.retain (new delta) index)
length)
#js {:image url}))))))
The retain method and the insert method are getting minified - and they shouldn't be. (The delete is not for some reason, I'm guessing that's because it's a keyword in javascript.)
I found the externs file for quilljs:
https://github.com/cljsjs/packages/blob/master/quill/resources/cljsjs/quill/common/quill.ext.js
Is there someway I need to supplement the extern file or another way I can write the code so those two methods don't get minified when advanced compilation is turned on for the compiler?
For some context below is the full file. It's based on this https://github.com/benhowell/reagent-quill/blob/master/quill.cljs
(ns quill.core
(:require
[reagent.core :as r]))
(defn quill-toolbar [id]
[:div {:id (str "quill-toolbar-" id)}
[:span {:class "ql-formats"}
[:select {:class "ql-header"}
[:option {:value "1"}]
[:option {:value "2"}]
[:option {:value "3"}]
[:option {:value "4"}]
[:option {:value "5"}]
[:option]]]
[:span {:class "ql-formats"}
[:select {:class "ql-font"}
[:option]
[:option {:value "serif"}]
[:option {:value "monospace"}]]]
[:span {:class "ql-formats"}
[:select {:class "ql-size"}
[:option {:value "small"}]
[:option]
[:option {:value "large"}]
[:option {:value "huge"}]]]
[:span {:class "ql-formats"}
[:button {:class "ql-bold"}]
[:button {:class "ql-italic"}]
[:button {:class "ql-underline"}]
[:button {:class "ql-strike"}]
[:button {:class "ql-blockquote"}]]
[:span {:class "ql-formats"}
[:select {:class "ql-align"}]]
[:span {:class "ql-formats"}
[:button {:class "ql-script" :value "sub"}]
[:button {:class "ql-script" :value "super"}]]
[:span {:class "ql-formats"}
[:button {:class "ql-indent" :value "-1"}]
[:button {:class "ql-indent" :value "+1"}]]
[:span {:class "ql-formats"}
[:button {:class "ql-image"}] ]
[:span {:class "ql-formats"}
[:select {:class "ql-color"}]
[:select {:class "ql-background"}]]
[:span {:class "ql-formats"}
[:button {:class "ql-clean"}]]])
(defn file-recieve-handler [this cb]
(let [range (.getSelection #this true)
index (.-index range)
length (.-length range)
delta (.import js/Quill "delta")]
(cb (fn [url]
(.updateContents #this
(.insert
(.delete
(.retain (new delta) index)
length)
#js {:image url}))))))
(defn editor [{:keys [id value selection on-change image-handler]}]
(let [this (r/atom nil)
get-value #(aget #this "container" "firstChild" "innerHTML")
string-id (if (keyword? id) (name id) id) ]
(r/create-class
{:component-did-mount
(fn [component]
(reset! this
(js/Quill.
(aget (.-children (r/dom-node component)) 1)
#js {:modules #js {:toolbar (aget (.-children (r/dom-node component)) 0)}
:theme "snow"
:scrollingContainer (str "quill-wrapper-" string-id)
:placeholder "Compose an epic..."}))
(.on #this "text-change"
(fn [delta old-delta source]
(on-change source (get-value))))
; FYI this is another area I had trouble. I got around it using
; get and set in the goog.object
(let [toolbar (.getModule #this "toolbar")
handlers (goog.object/get toolbar "handlers")]
(goog.object/set handlers "image" #(file-recieve-handler this image-handler)))
(if (= selection nil)
(.setSelection #this nil)
(.setSelection #this (first selection) (second selection) "api")))
:component-will-receive-props
(fn [component next-props]
(if
(or
(not= (:value (second next-props)) (get-value))
(not= (:id (r/props component)) (:id (second next-props))))
(do
(if (= selection nil)
(.setSelection #this nil)
(.setSelection #this (first selection) (second selection) "api"))
(.pasteHTML #this (:value (second next-props))))))
:display-name (str "quill-editor-" string-id)
:reagent-render
(fn []
[:div {:id (str "quill-wrapper-" string-id) :class "quill-wrapper"}
[quill-toolbar string-id]
[:div {:id (str "quill-editor-" string-id)
:class "quill-editor"
:dangerouslySetInnerHTML {:__html value}}]])})))
(defn display-area [{:keys [id content]}]
(let [this (r/atom nil)]
(r/create-class
{:component-did-mount
(fn [component]
(reset! this (js/Quill. (r/dom-node component)
#js {:theme "snow"
:modules #js {:toolbar false}
:placeholder ""}))
(.disable #this))
:component-will-receive-props
(fn [component next-props]
(.pasteHTML #this (:content (second next-props))))
:display-name (str "quill-display-area-" id)
:reagent-render
(fn []
[:div {:id (str "quill-display-area-" id)
:class "quill-display-area"
:dangerouslySetInnerHTML {:__html content}}])})))
You can turn on externs inference warnings and the compiler will tell you about things that are likely to rename.
;; in the actual ns
(set! *warn-on-infer* true)
;; in the build config compiler options
:infer-externs true
See https://clojurescript.org/guides/externs#externs-inference
To help debug issues with renaming you can turn on :pseudo-names true in the compiler options. That'll make it easier to figure out which methods get renamed and may need a ^js typehint or manual externs.
I used the type hints as #thomas-heller suggested and it worked. I broke the original function into two separate functions. Here it is re-written:
(defn add-image [^js/Quill quill ^js/Quill.imports.delta delta index length url]
(.updateContents quill
(.insert
(.delete
(.retain delta index)
length)
#js {:image url})))
;https://github.com/quilljs/quill/pull/995/files#diff-6dafc0fe6b5e9aed0859eef541e68372
(defn file-recieve-handler [^js/Quill quill cb]
(let [range (.getSelection quill true)
index (.-index range)
length (.-length range)
delta (new (.-delta (.-imports js/Quill)))]
(cb (fn [url]
(add-image quill delta index length url)))))

How to update an element in vector in atom state

I'm trying to create kind of todo list with ClojureScript and reagent framework. I defined app state as atom:
(def app-state
(r/atom
{:count 3
:todolist
[{:id 0 :text "Start learning mindcontrol" :finished true}
{:id 1 :text "Read a book 'Debugging JS in IE11 without pain'" :finished false}
{:id 2 :text "Become invisible for a while" :finished false}]}))
Have a function to update todo list:
(defn update-todolist [f & args]
(apply swap! app-state update-in [:todolist] f args))
And function toggle todo:
(defn toggle-todo [todo]
(update-todolist update-in [2] assoc :finished true))
Here I'm updating vector element directly by its index right now.
I'm rendering every item with this function:
(defn item [todo]
^{:key (:id todo)}
[:div
[:span {:class "item-text"} (:text todo)]
[:i {:class (str "ti-check " (if (:finished todo) "checked" "unchecked"))
:on-click #(toggle-todo (assoc todo :finished true))}]])
Here I'm passing updated todo but it's not correct to pass always true. Probably it would be enough to pass its index and it will solve my problem, but I have no idea how to do this.
(def app-state
(r/atom
{:count 3
:todolist
[{:id 0 :text "Start learning mindcontrol" :finished true}
{:id 1 :text "Read a book 'Debugging JS in IE11 without pain'" :finished false}
{:id 2 :text "Become invisible for a while" :finished false}]}))
(defn update-todolist [f & args]
(apply swap! app-state update-in [:todolist] f args))
(defn toggle-todo [todo]
(swap! app-state update-in [:todolist (:id todo) :finished] not))
(defn item [todo]
^{:key (:id todo)}
[:div
[:span {:class "item-text"} (:text todo)]
[:i {:class (str "ti-check " (if (:finished todo) "checked" "unchecked"))
:on-click #(toggle-todo todo)}]])
To toggle the value of the :finished key, just use not:
(swap! app-state update-in [:todolist 2 :finished] not) =>
{:count 3,
:todolist
[{:id 0, :text "Start learning mindcontrol",
:finished true}
{:id 1, :text "Read a book 'Debugging JS in IE11 without pain'",
:finished false}
{:id 2, :text "Become invisible for a while",
:finished true}]}
However, this does not tell you how the index 2 corresponds with the map that has :id 2 inside it.

re-frame: reset atom after dispatch

I have this form:
(defn input-question
[]
(let [new-question (reagent/atom "")]
(fn []
[:div
[:input {:type "text"
:value #new-question
:on-change #(reset! new-question (-> % .-target .-value))}]
[:input {:type "button"
:value "Save new question"
:on-click #(re-frame.core/dispatch [:create-question #new-question])} ] ])))
How can I reset #new-question to "" (empty string) after the dispatch?
You can use reset! on the ratom after dispatching:
#(do (re-frame.core/dispatch [:create-question #new-question])
(reset! new-question ""))
to reset it after dispatching the value.
You probably want to review the re-frame effects docs:
https://github.com/Day8/re-frame/blob/master/docs/EffectfulHandlers.md
https://github.com/Day8/re-frame/blob/master/docs/Effects.md
Note that you can also use dispatch-n:
https://github.com/Day8/re-frame/blob/master/docs/API.md#dispatch-n
and you might want to use the fn syntax instead of the #(...) shorthand function syntax:
:input {:type "button"
:value "Save new question"
:on-click (fn []
(re-frame.core/dispatch [:create-question #new-question])
(reset! new-question "")) } ]
You can also use both events and subs to keep as much logic out of your view code as possible. This means you will end up with many any events and subs, however this is by design and idiomatic to re-frame. This makes your re-frame code easier to understand, decoupled and more testable. Here is an example:
(rf/reg-fx
:save-question
(fn [question]))
;; Handle creating a question
(rf/reg-sub
:new-question-value
(fn [db _]
(get-in db [:new-question :value])))
(rf/reg-event-db
:on-new-question-change
(fn [db [_ value]]
(assoc-in db [:new-question :value] value)))
(rf/reg-event-fx
:on-save-question-click
(fn [{:keys [db]} _]
{:db (assoc-in db [:new-question :value] "")
:save-question (get-in db [:new-question :value])}))
(defn input-question
[]
(let [new-question-value (rf/subscribe [:new-question-value])
on-save-question-click #(rf/dispatch [:on-save-question-click])
on-new-question-change #(rf/dispatch [:on-new-question-change (.. % -target -value)])]
(fn []
[:div
[:input {:type "text"
:value #new-question-value
:on-change on-new-question-change}]
[:input {:type "button"
:value "Save new question"
:on-click on-save-question-click}]])))
Some extra notes about this code:
You should namespace your events and subs keys to prevent naming clashes
You should define a function and pass that into reg-fx, reg-event-db, reg-event-fx & reg-sub. Doing this can make the code more testable by allowing test code to call the function handler directly. However you can still test using Day8/re-frame-test but it's a little harder.

Reading Input With Om Next

I'm trying to understand how to read state from a text box in om.next. As I understand it, we are no longer bound/supposed to use core.async.
As a small example, consider writing in a textbox and binding it to a paragraph element, so that the text you enter automatically appears on the screen.
(def app-state (atom {:input-text "starting text"}))
(defn read-fn
[{:keys [state] :as env} key params]
(let [st #state]
(if-let [[_ v] (find st key)]
{:value v}
{:value :not-found})))
(defn mutate-fn
[{:keys [state] :as env} key {:keys [mytext]}]
(if (= 'update-text key)
{:value {:keys [:input-text]}
:action
(fn []
(swap! state assoc :input-text mytext))}
{:value :not-found}))
(defui RootView
static om/IQuery
(query [_]
[:input-text])
Object
(render [_]
(let [{:keys [input-text]} (om/props _)]
(dom/div nil
(dom/input
#js {:id "mybox"
:type "text"
:value input-text
:onChange #(om/transact! _ '[(update-text {:mytext (.-value (gdom/getElement "mybox"))})])
})
(dom/p nil input-text)))))
This doesn't work.
When firing the onChange event in the input form, the quoted expression does not grab the text from the box.
The first mutation fires and updates, but then subsequent mutations are not fired. Even though the state doesn't changed, should the query read the string from app-state and force the text to be the same?
I would make the :onChange event look like this:
:onChange (fn (_)
(let [v (.-value (gdom/getElement "mybox"))]
#(om/transact! this `[(update-text {:mytext ~v})])))
Here the value v will actually be going through. But also om/transact! needs either a component or the reconciler to be passed as its first parameter. Here I'm passing in this which will be the root component.

clojure.data.json write/read affects enlive data

What is the appropriate json way to save and reload enlive's html-resource outputs.
The following procedure does not preserve the data structure (note that I ask json/read-str to map keys to symbols):
(require net.cgrand.enlive-html :as html)
(require clojure.data.json :as json)
(def craig-home
(html/html-resource (java.net.URL. "http://www.craigslist.org/about/sites")))
(spit "./data/test_json_flow.json" (json/write-str craig-home))
(def craig-reloaded
(json/read-str (slurp "./data/test_json_flow.json") :key-fn keyword))
(defn count-nodes [page] (count (html/select page [:div.box :h4])))
(println (count-nodes craig-home)) ;; => 140
(println (count-nodes craig-reloaded)) ;; => 0
Thanks.
UPDATE
To address Mark Fischer's comment I post a different code that address html/select instead of html/html-resource
(def craig-home
(html/html-resource (java.net.URL. "http://www.craigslist.org/about/sites")))
(def craig-boxes (html/select craig-home [:div.box]))
(count (html/select craig-boxes [:h4])) ;; => 140
(spit "./data/test_json_flow.json" (json/write-str craig-boxes))
(def craig-boxes-reloaded
(json/read-str (slurp "./data/test_json_flow.json") :key-fn keyword))
(count (html/select craig-boxes-reloaded [:h4])) ;; => 0
A simpler approach would be to write/read using Clojure edn:
(require '[net.cgrand.enlive-html :as html])
(require '[clojure.data.json :as json])
(def craig-home (html/html-resource (java.net.URL. "http://www.craigslist.org/about/sites")))
(spit "./data/test_json_flow.json" (pr-str craig-home))
(def craig-reloaded
(clojure.edn/read-string (slurp "./data/test_json_flow.json")))
(defn count-nodes [page] (count (html/select page [:div.box :h4])))
(println (count-nodes craig-home)) ;=>140
(println (count-nodes craig-reloaded)) ;=>140
Enlive expects the tag name value also to be a keyword and will not find a node if the tag name value is a string (which is what json/write-str and json/read-str converts keywords to).
(json/write-str '({:tag :h4, :attrs nil, :content ("Illinois")}))
;=> "[{\"tag\":\"h4,\",\"attrs\":null,\"content\":[\"Illinois\"]}]"
(json/read-str (json/write-str '({:tag :h4, :attrs nil, :content ("Illinois")})) :key-fn keyword)
;=> [{:tag "h4", :attrs nil, :content ["Illinois"]}]
(pr-str '({:tag :h4 :attrs nil :content ("Illinois")}))
;=> "({:tag :h4, :attrs nil, :content (\"Illinois\")})"
(clojure.edn/read-string (pr-str '({:tag :h4, :attrs nil, :content ("Illinois")})))
;=> ({:tag :h4, :attrs nil, :content ("Illinois")})
If you must use json then you can use the following to convert the :tag values to keywords:
(clojure.walk/postwalk #(if-let [v (and (map? %) (:tag %))]
(assoc % :tag (keyword v)) %)
craig-reloaded)