diff --git a/.obsidian/workspace.json b/.obsidian/workspace.json index e422773..5dbf3c8 100644 --- a/.obsidian/workspace.json +++ b/.obsidian/workspace.json @@ -7,28 +7,6 @@ "id": "b7dbd1bdd3ec0467", "type": "tabs", "children": [ - { - "id": "bf955a0f84453b93", - "type": "leaf", - "state": { - "type": "markdown", - "state": { - "file": "Weaviate.md", - "mode": "source", - "source": false - } - } - }, - { - "id": "a86e2cbe66021c26", - "type": "leaf", - "state": { - "type": "release-notes", - "state": { - "currentVersion": "1.5.3" - } - } - }, { "id": "f580618efb9aa5bc", "type": "leaf", @@ -41,8 +19,7 @@ } } } - ], - "currentTab": 2 + ] } ], "direction": "vertical" @@ -92,7 +69,8 @@ } ], "direction": "horizontal", - "width": 208.5 + "width": 208.5, + "collapsed": true }, "right": { "id": "36b6c2860c95bb3e", @@ -177,7 +155,8 @@ } ], "direction": "horizontal", - "width": 371.5 + "width": 371.5, + "collapsed": true }, "left-ribbon": { "hiddenItems": { @@ -190,7 +169,7 @@ "copilot:Copilot Chat": false } }, - "active": "f580618efb9aa5bc", + "active": "20bc2a10460c7a9a", "lastOpenFiles": [ "Corbia spider.md", "Docker.md", diff --git a/Corbia spider.md b/Corbia spider.md index e6dc35c..bb33c9a 100644 --- a/Corbia spider.md +++ b/Corbia spider.md @@ -45,5 +45,66 @@ def scrape_domain(self): return json.dumps({"success": False}) ``` +For each page that will be processed by the domain based function, the `BeautifulSoup` object is accessible via `self.soup` + +```html + + + + + Venmo truffaut shabby chic organic + + +
+ +
+

+ +

+
+ +
+

Venmo truffaut shabby chic organic

+ +
+

I'm baby wayfarers tote bag gochujang cred food truck VHS quinoa kogi Brooklyn yr vegan etsy.

+

Portland squid DSA, raclette flannel pinterest craft beer cloud bread pour-over same.

+

Air plant pickled man braid tilde drinking vinegar ascot DIY poke meditation iceland JOMO sustainable. Hell of tbh kombucha +1 listicle.

+
+
+ + + +
+ + +``` + +```python +def scrape_domain(self): + try: + title = self.soup.find('title') + data = { + "title" : title.text.strip + "success" : True + } + return json.dumps(data, indent=4, ensure_ascii=False) + except Exception as e: + return json.dumps({"success": False}) +``` +