Python way to Download all the ASKTOM and Oracle MAG Posted by Connor McDonald at Linked In Group

Posted by Sriram Sanka on November 8, 2022

There is a Group Post By Connor on LinkedIn in Oracle Senior DBA Group, showing the links to access ASKTOM Best Posts and Oracle Magazines from https://asktom.oracle.com/pls/apex/f?p=100:9

Here is the Code Snippet that helps you to download all the Posts and Magazines as HTML files as your choice of Destination in your local file system .

Snippet To Download TOM KYTE Posts

import requests
from bs4 import BeautifulSoup
import string
import os
import urllib.request, urllib.error, urllib.parse
import sys

def Download_ASKTOM_files(path,url,enc,title):
    try:                
        response = urllib.request.urlopen(url)
        webContent = response.read().decode(enc)
        os.makedirs(path+'\\'+ 'ASKTOM', exist_ok=True)
        n=os.path.join(path+'\\'+ 'ASKTOM',title +'.html')
        f = open(n, 'w',encoding=enc)
        f.write(webContent)
        f.close
    except:
        n1=os.path.join(path+'\\'+  'ASKTOM_'+'Download_Error.log')
        f1 = open(n1, 'w',encoding=enc) 
        f1.write(url)
        f1.close
reqs = requests.get("https://asktom.oracle.com/tomkyte-blog.htm")
soup = BeautifulSoup(reqs.text, 'html.parser')
for link2 in soup.select(" a[href]"):
    src=link2["href"]
    durl='https://asktom.oracle.com/'+src
    tit =link2.get_text().replace(string.punctuation, " ").translate(str.maketrans('', '', string.punctuation))
    print(tit.replace(" ","_"),durl)
    Download_ASKTOM_files("c:\\Users\\....\\Downloads\\blogs\\",durl,'UTF-8',tit.replace(" ","_"))

Snippet to Download Magazines

import requests
from bs4 import BeautifulSoup
import string
import os
import urllib.request, urllib.error, urllib.parse
import sys

def Download_ASKTOM_files(path,url,enc,title):
    try:                
        response = urllib.request.urlopen(url)
        webContent = response.read().decode(enc)
        os.makedirs(path+'\\'+ 'ASKTOM_MAG', exist_ok=True)
        n=os.path.join(path+'\\'+ 'ASKTOM_MAG',title +'.html')
        f = open(n, 'w',encoding=enc)
        f.write(webContent)
        f.close
    except:
        n1=os.path.join(path+'\\'+  'ASKTOM_MAG_'+'Download_Error.log')
        f1 = open(n1, 'w',encoding=enc) 
        f1.write(url)
        f1.close
reqs = requests.get("https://asktom.oracle.com/magazine-archive.htm")
soup = BeautifulSoup(reqs.text, 'html.parser')
for link2 in soup.select(" a[href]"):
    src=link2["href"]
    durl='https://asktom.oracle.com/'+src
    tit =link2.get_text().replace(string.punctuation, " ").translate(str.maketrans('', '', string.punctuation))
    print(tit.replace(" ","_"),durl)
    Download_ASKTOM_files("c:\\Users\\......\\Downloads\\blogs\\",durl,'UTF-8',tit.replace(" ","_"))

Hope you liked it 🙂

This entry was posted on November 8, 2022 at 12:12 pm and is filed under ASKTOM, CONNOR, Python, TOMKYTE. Tagged: ASKTOM, CONNOR, DOWNLOAD, Python, TOMKYTE. You can follow any responses to this entry through the RSS 2.0 feed. You can leave a response, or trackback from your own site.

Not Just Databases

Moderator at

Member at

Subscribe

Categories

Archives

Visits n Views

Follow Blog via Email

Total Views

$riram $anka

Python way to Download all the ASKTOM and Oracle MAG Posted by Connor McDonald at Linked In Group

Leave a comment Cancel reply

Not Just Databases

Moderator at

Member at

Subscribe

Categories

Archives

Visits n Views

Follow Blog via Email

Total Views

$riram $anka

Python way to Download all the ASKTOM and Oracle MAG Posted by Connor McDonald at Linked In Group

Share this

Related

Leave a comment Cancel reply