Home > Linux, Python > Scaricare l’ultima versione di un certo programma

Scaricare l’ultima versione di un certo programma

Vi è mai capitato di dover (o voler) scaricare l’ultima versione di uno o più programmi? O sapere semplicemente se è stato rilasciato un aggiornamento?
Bene, a Frafra queste cose capitano, soprattutto quando si parla di sorgenti, compilazione, e compagnia cantante :)

Ho creato un programma Python (>= 3.x, testato su 3.1.1), che risolve automaticamente questo problema, con l’uso di un parser e di un crawler :) Sono cento linee giuste giuste, parzialmente commentate, con tanto di licenza (questa volta metto il file per intero, perché in primo luogo ho raggiunto un numero di linee tondo tondo, e in secondo luogo perché la prima linea è molto importante).

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# takeit.py
#
# Copyright 2009 Francesco Frassinelli <fraph24@gmail.com>
#
#     This program is free software: you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation, either version 3 of the License, or
#     (at your option) any later version.
#    
#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#     GNU General Public License for more details.
#    
#     You should have received a copy of the GNU General Public License
#     along with this program. If not, see <http://www.gnu.org/licenses/>.
 
""" This program shows you the latest version of a source package """
 
### HTMLParser patched version, see: http://bugs.python.org/issue755660
# $ cp /usr/local/lib/python3.1/html/parser.py HTMLParser.py
# $ wget http://bugs.python.org/file13041/htmlparser_error.diff
# $ patch -p0 HTMLParser.py < htmlparser_error.diff
import HTMLParser
import re, sys, urllib.request
 
servers = {
    "gnu":"http://ftp.gnu.org/gnu/",
    "linux":"http://ftp.kernel.org/pub/linux/",
    "vim":"http://ftp.vim.org/pub/vim/unix/",
} # List of available servers: add yours and report test them
 
class Parser(HTMLParser.HTMLParser):
    def __init__(self, page, callback):
        super(Parser, self).__init__()
        self.callback = callback
        self.page = page # Current page
    def handle_starttag(self, tag, attrs):
        if tag == "a":
            attrs = dict(attrs)
            if "href" in attrs:
                self.callback(self.page, attrs["href"])
    def error(self, message):
        pass # Overwriting default function, in order to skip any error
 
class Crawler:
    def __init__(self):
        self.source = re.compile(r"""^
            (.+?)-                  # package name
            ([\d\.-]+)              # version
            \.(tar\.bz2|tar\.gz)    # compression
            $""", re.VERBOSE) # VERBOSE is for multiline commented regex
        self.packages = dict()
    def inspect(self, page):
        print("I'm in:", page) # If there're errors, report it *always*
        with urllib.request.urlopen(page) as data:
            parser = Parser(page, self.callback)
            parser.feed(data.read().decode("utf_8", "ignore"))
            parser.close()
    def item(self, page, link):
        if self.source.match(link):
            res = self.source.search(link)
            package, version, compression = res.groups()
            if package == "0": # Should we use re.match("^\d+$", package)?
                return # Not properly detected: regex could be wrong
            if package not in self.packages:
                self.packages[package] = list()
            self.packages[package].append((version, page + link))
    def callback(self, page, link):
        # Special link / Backwards link / External link or absolute path
        useless = "?" in link or ".." in link or link.count("/") > 1
        if not useless:
            if link.endswith("/"):
                self.inspect(page + link) # Directory: recursive function
            else:
                self.item(page, link) # File: analize its name
    def results(self):
        for package, version in sorted(self.packages.items()):
            version, url = sorted(version, reverse = True)[0]
            yield package, version, url # Just to iterate it
 
def mycrawler(base, directory = ""):
    crawler = Crawler()
    crawler.inspect(base + directory)
    for package, version, url in crawler.results():
        print() # Newline ("\n")
        print("Name:", package)
        print(" -> Version:", version)
        print(" -> URL:", url)
 
if __name__ == "__main__":
    args = len(sys.argv)
    base      = (servers[sys.argv[1]],) if args > 1 else servers.values()
    directory = set(sys.argv[2:])       if args > 2 else ("",)
    for server in base:
        for page in directory:
            mycrawler(server, page)

frafra Linux, Python , ,

  1. Nessun commento ancora...
  1. Nessun trackback ancora...