google script:
# coding: utf-8
# In[ ]:
#Searching and Downloading Google Images/Image Links
#Import Libraries
#coding: UTF-8
import time #Importing the time library to check the time of code execution
import sys #Importing the System Library
import os
import urllib2
########### Edit From Here ###########
#This list is used to search keywords. You can edit this list to search for google images of your choice. You can simply add and remove elements of the list.
search_keyword = ['Australia']
#This list is used to further add suffix to your search term. Each element of the list will help you download 100 images. First element is blank which denotes that no suffix is added to the search keyword of the above list. You can edit the list by adding/deleting elements from it.So if the first element of the search_keyword is 'Australia' and the second element of keywords is 'high resolution', then it will search for 'Australia High Resolution'
keywords = [' high resolution']
########### End of Editing ###########
#Downloading entire Web Document (Raw Page Content)
def download_page(url):
version = (3,0)
cur_version = sys.version_info
if cur_version >= version: #If the Current Version of Python is 3.0 or above
import urllib.request #urllib library for Extracting web pages
try:
headers = {}
headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
req = urllib.request.Request(url, headers = headers)
resp = urllib.request.urlopen(req)
respData = str(resp.read())
return respData
except Exception as e:
print(str(e))
else: #If the Current Version of Python is 2.x
import urllib2
try:
headers = {}
headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
req = urllib2.Request(url, headers = headers)
response = urllib2.urlopen(req)
page = response.read()
return page
except:
return"Page Not found"
#Finding 'Next Image' from the given raw page
def _images_get_next_item(s):
start_line = s.find('rg_di')
if start_line == -1: #If no links are found then give an error!
end_quote = 0
link = "no_links"
return link, end_quote
else:
start_line = s.find('"class="rg_meta"')
start_content = s.find('"ou"',start_line+1)
end_content = s.find(',"ow"',start_content+1)
content_raw = str(s[start_content+6:end_content-1])
return content_raw, end_content
#Getting all links with the help of '_images_get_next_image'
def _images_get_all_items(page):
items = []
while True:
item, end_content = _images_get_next_item(page)
if item == "no_links":
break
else:
items.append(item) #Append all the links in the list named 'Links'
time.sleep(0.1) #Timer could be used to slow down the request for image downloads
page = page[end_content:]
return items
############## Main Program ############
t0 = time.time() #start the timer
#Download Image Links
i= 0
while i<len(search_keyword):
items = []
iteration = "Item no.: " + str(i+1) + " -->" + " Item name = " + str(search_keyword[i])
print (iteration)
print ("Evaluating...")
search_keywords = search_keyword[i]
search = search_keywords.replace(' ','%20')
#make a search keyword directory
try:
os.makedirs(search_keywords)
except OSError, e:
if e.errno != 17:
raise
# time.sleep might help here
pass
j = 0
while j<len(keywords):
pure_keyword = keywords[j].replace(' ','%20')
url = 'https://www.google.com/search?q=' + search + pure_keyword + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
raw_html = (download_page(url))
time.sleep(0.1)
items = items + (_images_get_all_items(raw_html))
j = j + 1
#print ("Image Links = "+str(items))
print ("Total Image Links = "+str(len(items)))
print ("\n")
#This allows you to write all the links into a test file. This text file will be created in the same directory as your code. You can comment out the below 3 lines to stop writing the output to the text file.
info = open('output.txt', 'a') #Open the text file called database.txt
info.write(str(i) + ': ' + str(search_keyword[i-1]) + ": " + str(items) + "\n\n\n") #Write the title of the page
info.close() #Close the file
t1 = time.time() #stop the timer
total_time = t1-t0 #Calculating the total time required to crawl, find and download all the links of 60,000 images
print("Total time taken: "+str(total_time)+" Seconds")
print ("Starting Download...")
## To save imges to the same directory
# IN this saving process we are just skipping the URL if there is any error
k=0
errorCount=0
while(k<len(items)):
from urllib2 import Request,urlopen
from urllib2 import URLError, HTTPError
try:
req = Request(items[k], headers={"User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
response = urlopen(req,None,15)
output_file = open(search_keywords+"/"+str(k+1)+".jpg",'wb')
data = response.read()
output_file.write(data)
response.close();
print("completed ====> "+str(k+1))
k=k+1;
except IOError: #If there is any IOError
errorCount+=1
print("IOError on image "+str(k+1))
k=k+1;
except HTTPError as e: #If there is any HTTPError
errorCount+=1
print("HTTPError"+str(k))
k=k+1;
except URLError as e:
errorCount+=1
print("URLError "+str(k))
k=k+1;
i = i+1
print("\n")
print("Everything downloaded!")
print("\n"+str(errorCount)+" ----> total Errors")
#----End of the main program ----#
# In[ ]:
wordpress script:
import urllib
from wordpress_xmlrpc import Client, WordPressPost
from wordpress_xmlrpc.methods import posts
import xmlrpclib
from wordpress_xmlrpc.compat import xmlrpc_client
from wordpress_xmlrpc.methods import media, posts
import os
########################### Read Me First ###############################
'''
------------------------------------------In DETAIL--------------------------------
Description
===========
Add new posts to WordPress remotely using Python using XMLRPC library provided by the WordPress.
Installation Requirement
************************
Verify you meet the following requirements
==========================================
Install Python 2.7 (Don't download 3+, as most libraries dont yet support version 3).
Install from PyPI using easy_install python-wordpress-xmlrpc
Easy_Install Link: https://pypi.python.org/pypi/setuptools
==========================================
Windows Installation Guide
==========================
-Download and Install Easy_Install from above Link -Extract Downloaded File and from CMD go to the extracted directory and run 'python setup.py install'. This will install easy_install. -Go to %/python27/script and run following command easy_install python-wordpress-xmlrpc
Ubuntu Installation Guide
=========================
sudo apt-get install python-setuptools
sudo easy_install python-wordpress-xmlrpc
Note: Script has its dummy data to work initially which you can change or integrate with your code easily for making it more dynamic.
****************************************
For Bugs/Suggestions
contact@waqasjamal.com
****************************************
------------------------------------------In DETAIL--------------------------------
'''
class Custom_WP_XMLRPC:
def post_article(self,wpUrl,wpUserName,wpPassword,articleTitle, articleCategories, articleContent, articleTags,PhotoUrl):
self.path=os.getcwd()+"\\00000001.jpg"
self.articlePhotoUrl=PhotoUrl
self.wpUrl=wpUrl
self.wpUserName=wpUserName
self.wpPassword=wpPassword
#Download File
f = open(self.path,'wb')
f.write(urllib.urlopen(self.articlePhotoUrl).read())
f.close()
#Upload to WordPress
client = Client(self.wpUrl,self.wpUserName,self.wpPassword)
filename = self.path
# prepare metadata
data = {'name': 'picture.jpg','type': 'image/jpg',}
# read the binary file and let the XMLRPC library encode it into base64
with open(filename, 'rb') as img:
data['bits'] = xmlrpc_client.Binary(img.read())
response = client.call(media.UploadFile(data))
attachment_id = response['id']
#Post
post = WordPressPost()
post.title = articleTitle
post.content = articleContent
post.terms_names = { 'post_tag': articleTags,'category': articleCategories}
post.post_status = 'publish'
post.thumbnail = attachment_id
post.id = client.call(posts.NewPost(post))
print 'Post Successfully posted. Its Id is: ',post.id
#########################################
# POST & Wp Credentials Detail #
#########################################
#Url of Image on the internet
ariclePhotoUrl='http://i1.tribune.com.pk/wp-content/uploads/2013/07/584065-twitter-1375197036-960-640x480.jpg'
# Dont forget the /xmlrpc.php cause thats your posting adress for XML Server
wpUrl='http://YourWebSite.com/xmlrpc.php'
#WordPress Username
wpUserName='WordPressUsername'
#WordPress Password
wpPassword='YourWordPressPassword'
#Post Title
articleTitle='Testing Python Script version 3'
#Post Body/Description
articleContent='Final .... Testing Fully Automated'
#list of tags
articleTags=['code','python']
#list of Categories
articleCategories=['language','art']
#########################################
# Creating Class object & calling the xml rpc custom post Function
#########################################
xmlrpc_object = Custom_WP_XMLRPC()
#On Post submission this function will print the post id
xmlrpc_object.post_article(wpUrl,wpUserName,wpPassword,articleTitle, articleCategories, articleContent, articleTags,ariclePhotoUrl)
for exemple :
search_keyword = ['Australia']
donwload a 20 image frome Google image and update to wordpress like a new posts
#########################################
# POST & Wp Credentials Detail #
#########################################
#Url of Image on the internet
ariclePhotoUrl='https://www.google.com/search?q=' search_keyword +'&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
# Dont forget the /xmlrpc.php cause thats your posting adress for XML Server
wpUrl='http://YourWebSite.com/xmlrpc.php'
#WordPress Username
wpUserName='WordPressUsername'
#WordPress Password
wpPassword='YourWordPressPassword'
#Post Title
articleTitle='Testing Python Script version 3'
#Post Body/Description
articleContent='Final .... Testing Fully Automated'
#list of tags
articleTags=['code','python']
#list of Categories
articleCategories=['language','art']
#########################################
# Creating Class object & calling the xml rpc custom post Function
#########################################
xmlrpc_object = Custom_WP_XMLRPC()
#On Post submission this function will print the post id
xmlrpc_object.post_article(wpUrl,wpUserName,wpPassword,articleTitle, articleCategories, articleContent, articleTags,ariclePhotoUrl)