#!/usr/bin/env python
import cgitb
import cgi
import codecs, sys
import xml.dom.minidom 
from urllib2 import Request, urlopen, URLError

"""
This program demonstrates the XML Results parsing for Google Site Search using 
cgi python.  You must edit the 'your_cx_number' parameter below and deploy on a 
webserver supporting python cgi (tested on apache 2+linux). Send in a query ('q=') and a parameter 
indicating the result set to retrieve (&start=) to get XML Results from GSS.  The &start=
parameter isn't implemented in this script but feel fee to add it in and query /cse
The XML results are minimally parsed and displayed to the user. 
see
http://www.google.com/cse/docs/resultsxml.html
"""



sys.stdout = codecs.getwriter('utf-8')(sys.stdout)

cgitb.enable()

print "Content-Type: text/html"
print
print '<html><head><title>GSS XML in python CGI </title></head>'
print '<body>'
print '<form name=query id=query method=POST action=gss.py>'
print '<input type="text" id="q" name="q" /><br/>'
print '<input type="submit" value="submit"/>'
print '<br/>'

your_cx_number = '008339188426815167219:z87xrys-eed'

form = None
form = cgi.FieldStorage()

q = None

try:
  q = None
  q = form['q'].value
except KeyError:
  print 'No query term' 


if q is not None:
  u = 'http://www.google.com/cse?cx=%s&client=google-csbe&output=xml_no_dtd&q=%s' % (your_cx_number,q)
  req = Request(u);
  try:
    response = urlopen(req);
    content = response.read();

    xmldoc = xml.dom.minidom.parseString(content)
    
    print '<h3>Query: %s </h3><br/>' %q
    
    # Estimated Results is inaccurate; do not use
    m_node = xmldoc.getElementsByTagName('M')
    for nodes in m_node:
      if nodes.nodeName == 'M':
        print '<li>Estimated number of Results : %s <br/>' % nodes.childNodes[0].nodeValue

    r_nodes = xmldoc.getElementsByTagName('R')
    for rnode in r_nodes:
      title = ''
      link = ''
      snippet = ''
      rchild = rnode.childNodes
      for nodes in rchild:
        if nodes.nodeName == 'U':
          link = nodes.childNodes[0].nodeValue
        if nodes.nodeName == 'T':
          title = nodes.childNodes[0].nodeValue
        if nodes.nodeName == 'S':
          snippet = nodes.childNodes[0].nodeValue
      print '<p> <a href="%s">%s</a><br/><i>%s</i></p>' % (link, title, snippet)

  except URLError, e:
    if hasattr(e, 'reason'):
      print 'Error connecting to %s for %s: '% u, e.reason
    elif hasattr(e, 'code'):
      print 'Error code: $s for URL %s' % e.code, u
      
print '</body> </html>'



