From 884803a2dbece6c0bf5452f9c6e69ca6251fd73b Mon Sep 17 00:00:00 2001
From: kunal kumar barman <kumar96kunal@gmail.com>
Date: Fri, 18 Oct 2019 07:04:34 +0530
Subject: [PATCH 01/10] Python program that scrufs 3 site at a time

add input in the compiling time  like --  python3 project1.py (man)
---
 project1.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 project1.py

diff --git a/project1.py b/project1.py
new file mode 100644
index 000000000000..9264c74e8263
--- /dev/null
+++ b/project1.py
@@ -0,0 +1,21 @@
+from bs4 import BeautifulSoup
+import requests
+#from urllib.request import urlopen
+import webbrowser
+import sys
+from fake_useragent import UserAgent
+ua = {"UserAgent":"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0"}
+print("Googling.....")
+res=requests.get('https://www.google.com/search?q='+ ' '.join(sys.argv[1:]),headers=ua)
+#res.raise_for_status()
+file=open('project1a.html','wb') #only for knowing the class
+for i in res.iter_content(10000):
+    file.write(i)
+soup= BeautifulSoup(res.text,'lxml')
+linkele=soup.select('.eZt8xd')
+
+num=min(5,len(linkele))
+print(num)
+for i in range(num):
+    
+    webbrowser.open('http://google.com' + linkele[i].get('href'))
\ No newline at end of file

From dfed24aa0b6c97c8fbc0e61d2d25511d7c2da2b0 Mon Sep 17 00:00:00 2001
From: kunal kumar barman <kumar96kunal@gmail.com>
Date: Fri, 18 Oct 2019 21:30:39 +0530
Subject: [PATCH 02/10] Update project1.py

---
 project1.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/project1.py b/project1.py
index 9264c74e8263..45c1c49f9ba4 100644
--- a/project1.py
+++ b/project1.py
@@ -3,7 +3,7 @@
 #from urllib.request import urlopen
 import webbrowser
 import sys
-from fake_useragent import UserAgent
+from fake_useragent import UserAgent  #This is needed for intialazing the User agent of the system otherwise it act like bot
 ua = {"UserAgent":"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0"}
 print("Googling.....")
 res=requests.get('https://www.google.com/search?q='+ ' '.join(sys.argv[1:]),headers=ua)
@@ -18,4 +18,4 @@
 print(num)
 for i in range(num):
     
-    webbrowser.open('http://google.com' + linkele[i].get('href'))
\ No newline at end of file
+    webbrowser.open('http://google.com' + linkele[i].get('href'))

From 5c1fbeaff3c0fb327af4ff6c0254ad6610375ad6 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 18 Oct 2019 21:32:33 +0200
Subject: [PATCH 03/10] noqa: F401 and reformat with black

---
 project1.py | 41 ++++++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/project1.py b/project1.py
index 45c1c49f9ba4..e7a77b643fe0 100644
--- a/project1.py
+++ b/project1.py
@@ -1,21 +1,28 @@
+import sys
+import webbrowser
+
 from bs4 import BeautifulSoup
 import requests
-#from urllib.request import urlopen
-import webbrowser
-import sys
-from fake_useragent import UserAgent  #This is needed for intialazing the User agent of the system otherwise it act like bot
-ua = {"UserAgent":"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0"}
+
+# fake_useragent is needed for so that this script is not viewed as a bot
+from fake_useragent import UserAgent  # noqa: F401
+
+user_agent = {
+    "UserAgent": (
+        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) "
+        "Gecko/20100101 Firefox/69.0"
+    )
+}
 print("Googling.....")
-res=requests.get('https://www.google.com/search?q='+ ' '.join(sys.argv[1:]),headers=ua)
-#res.raise_for_status()
-file=open('project1a.html','wb') #only for knowing the class
-for i in res.iter_content(10000):
-    file.write(i)
-soup= BeautifulSoup(res.text,'lxml')
-linkele=soup.select('.eZt8xd')
+url = "https://www.google.com/search?q=" + " ".join(sys.argv[1:])
+res = requests.get(url, headers=user_agent)
+# res.raise_for_status()
+with open("project1a.html", "wb") as out_file:  # only for knowing the class
+    for data in res.iter_content(10000):
+        out_file.write(data)
+soup = BeautifulSoup(res.text, "lxml")
+links = list(soup.select(".eZt8xd"))[:5]
 
-num=min(5,len(linkele))
-print(num)
-for i in range(num):
-    
-    webbrowser.open('http://google.com' + linkele[i].get('href'))
+print(len(links))
+for link in links:
+    webbrowser.open(f"http://google.com{links.get('href')}")

From 1125c44a971c76db3dd1faa72dcb166bff43e9e0 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 18 Oct 2019 21:38:40 +0200
Subject: [PATCH 04/10] Rename project1.py to
 web_programming/crawl_google_results.py

---
 project1.py => web_programming/crawl_google_results.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename project1.py => web_programming/crawl_google_results.py (100%)

diff --git a/project1.py b/web_programming/crawl_google_results.py
similarity index 100%
rename from project1.py
rename to web_programming/crawl_google_results.py

From 05808a2dc62a5b68e4106462beb52e186256261d Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 18 Oct 2019 21:40:16 +0200
Subject: [PATCH 05/10] Add beautifulsoup4 to requirements.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index f5790ad53c30..8305f55d923f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+beautifulsoup4
 black
 flake8
 matplotlib

From 81106a61492b84dbda79c655d28da702d0b258e8 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 18 Oct 2019 21:45:23 +0200
Subject: [PATCH 06/10] Add fake_useragent to requirements.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 8305f55d923f..357ce9598ca8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 beautifulsoup4
 black
+fake_useragent
 flake8
 matplotlib
 mypy

From 4600feab6bdb27558530330624192fe4f4c5a795 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 18 Oct 2019 21:47:09 +0200
Subject: [PATCH 07/10] Update crawl_google_results.py

---
 web_programming/crawl_google_results.py | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/web_programming/crawl_google_results.py b/web_programming/crawl_google_results.py
index e7a77b643fe0..7ef6a3e411f2 100644
--- a/web_programming/crawl_google_results.py
+++ b/web_programming/crawl_google_results.py
@@ -2,20 +2,12 @@
 import webbrowser
 
 from bs4 import BeautifulSoup
+from fake_useragent import UserAgent
 import requests
 
-# fake_useragent is needed for so that this script is not viewed as a bot
-from fake_useragent import UserAgent  # noqa: F401
-
-user_agent = {
-    "UserAgent": (
-        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) "
-        "Gecko/20100101 Firefox/69.0"
-    )
-}
 print("Googling.....")
 url = "https://www.google.com/search?q=" + " ".join(sys.argv[1:])
-res = requests.get(url, headers=user_agent)
+res = requests.get(url, headers=UserAgent().random)
 # res.raise_for_status()
 with open("project1a.html", "wb") as out_file:  # only for knowing the class
     for data in res.iter_content(10000):

From dd627aeb4a64354a6541691099dff3fedec07717 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 18 Oct 2019 23:14:25 +0200
Subject: [PATCH 08/10] headers={"UserAgent": UserAgent().random}

---
 web_programming/crawl_google_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web_programming/crawl_google_results.py b/web_programming/crawl_google_results.py
index 7ef6a3e411f2..8aefe525789c 100644
--- a/web_programming/crawl_google_results.py
+++ b/web_programming/crawl_google_results.py
@@ -7,7 +7,7 @@
 
 print("Googling.....")
 url = "https://www.google.com/search?q=" + " ".join(sys.argv[1:])
-res = requests.get(url, headers=UserAgent().random)
+res = requests.get(url, headers={"UserAgent": UserAgent().random})
 # res.raise_for_status()
 with open("project1a.html", "wb") as out_file:  # only for knowing the class
     for data in res.iter_content(10000):

From 1b2eb0c368c4863c99c3309fe17b40d3cf38b6dc Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 18 Oct 2019 23:20:58 +0200
Subject: [PATCH 09/10] html.parser, not lxml

---
 web_programming/crawl_google_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web_programming/crawl_google_results.py b/web_programming/crawl_google_results.py
index 8aefe525789c..7e8cb8564b1d 100644
--- a/web_programming/crawl_google_results.py
+++ b/web_programming/crawl_google_results.py
@@ -12,7 +12,7 @@
 with open("project1a.html", "wb") as out_file:  # only for knowing the class
     for data in res.iter_content(10000):
         out_file.write(data)
-soup = BeautifulSoup(res.text, "lxml")
+soup = BeautifulSoup(res.text, "html.parser")
 links = list(soup.select(".eZt8xd"))[:5]
 
 print(len(links))

From 188d1f4df73c63d799382e539cb97658ca660a82 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 18 Oct 2019 23:25:16 +0200
Subject: [PATCH 10/10] link, not links

---
 web_programming/crawl_google_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web_programming/crawl_google_results.py b/web_programming/crawl_google_results.py
index 7e8cb8564b1d..c31ec1526d3e 100644
--- a/web_programming/crawl_google_results.py
+++ b/web_programming/crawl_google_results.py
@@ -17,4 +17,4 @@
 
 print(len(links))
 for link in links:
-    webbrowser.open(f"http://google.com{links.get('href')}")
+    webbrowser.open(f"http://google.com{link.get('href')}")