All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Aníbal Limón" <anibal.limon@linux.intel.com>
To: openembedded-core@lists.openembedded.org
Subject: [PATCHv2] oe/distro_check.py: Fixes for python3
Date: Fri, 10 Jun 2016 10:12:10 -0500	[thread overview]
Message-ID: <1465571530-32298-1-git-send-email-anibal.limon@linux.intel.com> (raw)

create_socket: Use urllib because urllib2 is now urllib in python3
and proxies as argument are deprecated so export them in the environ
instead.

get_links_from_url: Change usage of sgmllib for parsing HTML because
is deprecated in python 3, use instead bs4 that is already imported
in the bitbake tree.

[YOCTO #9744]

Signed-off-by: Aníbal Limón <anibal.limon@linux.intel.com>
---
 meta/lib/oe/distro_check.py | 82 +++++++++++++++++++--------------------------
 1 file changed, 34 insertions(+), 48 deletions(-)

diff --git a/meta/lib/oe/distro_check.py b/meta/lib/oe/distro_check.py
index f1f1fbb..87c52fa 100644
--- a/meta/lib/oe/distro_check.py
+++ b/meta/lib/oe/distro_check.py
@@ -1,53 +1,35 @@
 from contextlib import contextmanager
-@contextmanager
+
+from bb.utils import export_proxies
+
 def create_socket(url, d):
-    import urllib.request, urllib.parse, urllib.error
-    socket = urllib.request.urlopen(url, proxies=get_proxies(d))
+    import urllib
+
+    socket = None
     try:
-        yield socket
-    finally:
-        socket.close()
+        export_proxies(d)
+        socket = urllib.request.urlopen(url)
+    except:
+        bb.warn("distro_check: create_socket url %s can't access" % url)
 
-def get_proxies(d):
-    proxies = {}
-    for key in ['http', 'https', 'ftp', 'ftps', 'no', 'all']:
-        proxy = d.getVar(key + '_proxy', True)
-        if proxy:
-            proxies[key] = proxy
-    return proxies
+    return socket
 
 def get_links_from_url(url, d):
     "Return all the href links found on the web location"
 
-    import sgmllib
-    
-    class LinksParser(sgmllib.SGMLParser):
-        def parse(self, s):
-            "Parse the given string 's'."
-            self.feed(s)
-            self.close()
-    
-        def __init__(self, verbose=0):
-            "Initialise an object passing 'verbose' to the superclass."
-            sgmllib.SGMLParser.__init__(self, verbose)
-            self.hyperlinks = []
-    
-        def start_a(self, attributes):
-            "Process a hyperlink and its 'attributes'."
-            for name, value in attributes:
-                if name == "href":
-                    self.hyperlinks.append(value.strip('/'))
-    
-        def get_hyperlinks(self):
-            "Return the list of hyperlinks."
-            return self.hyperlinks
+    from bs4 import BeautifulSoup, SoupStrainer
 
-    with create_socket(url,d) as sock:
+    hyperlinks = []
+
+    webpage = ''
+    sock = create_socket(url,d)
+    if sock:
         webpage = sock.read()
 
-    linksparser = LinksParser()
-    linksparser.parse(webpage)
-    return linksparser.get_hyperlinks()
+    soup = BeautifulSoup(webpage, "html.parser", parse_only=SoupStrainer("a"))
+    for line in soup.find_all('a', href=True):
+        hyperlinks.append(line['href'].strip('/'))
+    return hyperlinks
 
 def find_latest_numeric_release(url, d):
     "Find the latest listed numeric release on the given url"
@@ -162,14 +144,18 @@ def find_latest_debian_release(url, d):
 
 def get_debian_style_source_package_list(url, section, d):
     "Return the list of package-names stored in the debian style Sources.gz file"
-    with create_socket(url,d) as sock:
-        webpage = sock.read()
-        import tempfile
-        tmpfile = tempfile.NamedTemporaryFile(mode='wb', prefix='oecore.', suffix='.tmp', delete=False)
-        tmpfilename=tmpfile.name
-        tmpfile.write(sock.read())
-        tmpfile.close()
+    import tempfile
     import gzip
+
+    webpage = ''
+    sock = create_socket(url,d)
+    if sock:
+        webpage = sock.read()
+
+    tmpfile = tempfile.NamedTemporaryFile(mode='wb', prefix='oecore.', suffix='.tmp', delete=False)
+    tmpfilename=tmpfile.name
+    tmpfile.write(sock.read())
+    tmpfile.close()
     bb.note("Reading %s: %s" % (url, section))
 
     f = gzip.open(tmpfilename)
@@ -266,9 +252,9 @@ def update_distro_data(distro_check_dir, datetime, d):
     import fcntl
     try:
         if not os.path.exists(datetime_file):
-            open(datetime_file, 'w+b').close() # touch the file so that the next open won't fail
+            open(datetime_file, 'w+').close() # touch the file so that the next open won't fail
 
-        f = open(datetime_file, "r+b")
+        f = open(datetime_file, "r+")
         fcntl.lockf(f, fcntl.LOCK_EX)
         saved_datetime = f.read()
         if saved_datetime[0:8] != datetime[0:8]:
-- 
2.1.4



                 reply	other threads:[~2016-06-10 15:11 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1465571530-32298-1-git-send-email-anibal.limon@linux.intel.com \
    --to=anibal.limon@linux.intel.com \
    --cc=openembedded-core@lists.openembedded.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.