#!/bin/bash
# megaupbash version 0.3
# gestor de descargas de megaupload
# Bash script
#
# Escrito por _Poseidon_, comenzado el martes 26 de agosto de 2008, partiendo de la base de rapibash
# Última modificación Martes 14 de abril de 2009
# Webpage http://pablo777.wordpress.com/
function main {
settings
if [ $# = 0 ]
then
if which zenity >"/dev/null"; then zenity-gui;
elif which Xdialog >"/dev/null"; then Xdialog-gui;
fi
else
case "$1" in
start)
PROCESS="$(ps ax -o pid,args)"
if [ $(echo "$PROCESS"| grep "$0 start"| wc -l) -gt 1 ]
then echo "$ERR5"
else start_download
fi ;;
stop) stop_download ;;
clean) clean_list ;;
move) move_URI "$2" "$3";;
add) add_URIs "$@" ;;
z) zenity-gui ;;
x) Xdialog-gui ;;
esac
fi
}
function start_download {
settings
if [ ! -w "$LIST_FILE" ] || [ ! -w "$LOG_FILE" ] || [ ! -w "$DOWNLOAD_STATUS_FILE" ] || [ ! -w "$DOWNLOAD_DIR" ]
then echo "$ERR6"
return 1
fi
while [ $(cat "$LIST_FILE"| wc -l) -gt 0 ]; do
each_URI="$(head -n 1 "$LIST_FILE")"
echo "$each_URI" > "$DOWNLOAD_STATUS_FILE"
echo "$MSG1 $each_URI"
if HTML_FILE1="$(wget -q -O - "$each_URI")"
then
if echo "$HTML_FILE1" |grep -q "Unfortunately, the link you have clicked is not available."
then
echo "$ERR2"
LIST="$(cat "$LIST_FILE" | grep -vx "$each_URI")"
if [ ! -z "$LIST" ]
then echo "$LIST" >"$LIST_FILE"
else echo -n >"$LIST_FILE"
fi
else
CAPTCHACODE="$(echo "$HTML_FILE1" | grep 'name="captchacode"' | sed -e 's/^.*value="//' | sed -e 's/".*$//')"
MEGAVAR="$(echo "$HTML_FILE1" | grep 'name="megavar"' | sed -e 's/^.*value="//' | sed -e 's/".*$//')"
GENCAP="$(echo "$HTML_FILE1" | grep "gencap" | sed -e 's/^.*src="//' | sed -e 's/".*$//')"
wget -q -O "/tmp/captcha.gif" "$GENCAP"
CAPTCHA="$(python "/tmp/megaupload_captcha.py" "/tmp/captcha.gif")"
rm "/tmp/captcha.gif"
if [ "$(echo -n "$CAPTCHA" |wc -c )" != "4" ]
then
echo "$ERR7"
else
if HTML_FILE2="$(wget -q --post-data "captchacode=$CAPTCHACODE&megavar=$MEGAVAR&captcha=$CAPTCHA" "$each_URI" -O - )"
then
DOWNLOADLINK="$(echo "$HTML_FILE2" | grep 'id="downloadlink"' | sed -e 's/^.*href="//' | sed -e 's/".*$//' )"
DOWNLOADLINK="$(echo "$DOWNLOADLINK" | sed -e 's/\&\#[0-9]*\;/_/g')" # Reemplaza los caracteres raros por _
if [ ! -z "$DOWNLOADLINK" ]
then
#echo "$MSG3"
#sleep 46 Ya no es necesario esperar
echo "$MSG4 $each_URI"
cd "$DOWNLOAD_DIR"
if wget -a "$DOWNLOAD_STATUS_FILE" "$DOWNLOADLINK"
then
echo "$MSG5"
LIST="$(cat "$LIST_FILE" | grep -vx "$each_URI")"
if [ ! -z "$LIST" ]
then echo "$LIST" >"$LIST_FILE"
else echo -n >"$LIST_FILE"
fi
else echo "$ERR1 $each_URI $MSG6"
fi
else echo "$ERR8"
fi
else echo "$ERR1 $HTML_FILE2"
fi
fi
fi
fi
done
}
function stop_download {
set_status
if [ "$DOWNLOADING" = "yes" ]
then
kill -9 $(echo "$PROCESS" | grep "$0 start" | awk -F " " '{print $1}')
kill -9 $(echo "$PROCESS" | egrep "wget.*http://.+\.megaupload\.com/files/" | awk -F " " '{print $1}')
fi
}
function clean_list {
if echo -n >"$LIST_FILE";then return 0; else return 1; fi
}
function add_URIs {
for each_argument in $(echo "$@" | tr -sc '[:alnum:]:/=?.' '\n' | egrep 'http://(www\.)?megaupload\.com/' | uniq)
do
echo "$each_argument">>"$LIST_FILE"
done
LIST="$(uniq "$LIST_FILE")"
echo "$LIST" > "$LIST_FILE"
}
function move_URI {
FROM="$1"
TO="$2"
FILE="$(cat "$LIST_FILE")"
if [ "$FROM" != "$TO" ]
then
if [ $FROM -le $(echo "$FILE"| wc -l) ] && [ $TO -le $(echo "$FILE"| wc -l) ];
then
LINE="$(echo "$FILE"| sed -n "$FROM"p)"
NEW_LIST="$(echo "$FILE"| grep -vx "$LINE")"
if [ "$TO" = 0 ] ;
then NEW_LIST="$(echo "$NEW_LIST"| sed 1i"$LINE")"
elif [ "$FROM" -lt "$TO" ]
then NEW_LIST="$(echo "$NEW_LIST"| sed "$(($TO-1))"a"$LINE")"
else NEW_LIST="$(echo "$NEW_LIST"| sed "$TO"a"$LINE")"
fi
echo "$NEW_LIST" >"$LIST_FILE"&& return 0
else
return 1
fi
fi
}
function zenity-gui {
set_status
if [ "$DOWNLOADING" = "yes" ]
then startorstop="stop"; startorstoplabel="$ZG16"
else startorstop="start"; startorstoplabel="$ZG15"
fi
if OPTION="$(zenity --list --hide-column=1 --column "" --column "" --text="$(if [ "$WGET_RUNNING" = "yes" ]; then echo -e "$DOING $(if [ $(echo $DOWNLOAD_PROGRESS| tr -d K ) -gt 1024 ];then echo $[$(echo $DOWNLOAD_PROGRESS | tr -d K )/1024]M; else echo $DOWNLOAD_PROGRESS; fi )/$DOWNLOAD_SIZE_H ($DOWNLOAD_PERCENTAGE)\n$ZG19 $DOWNLOAD_TIME_LEFT $ZG20 $DOWNLOAD_SPEED"; else echo "$DOING"; fi)" --title="$ZG1" --width 600 --height 410 actualizar "$ZG2" "$startorstop" "$startorstoplabel" view_list "$ZG3" add_uri "$ZG4" delete_uri "$ZG5" move_uri "$ZG6" clean_list "$ZG7" add_webpage "$ZG8" add_file "$ZG9" configure "$ZG22" view_log "$ZG21" )"
then
case "$OPTION" in
view_list)
zenity --list --column "" --title "$ZG3" --text="$DOING" --width 600 --height 410 $(cat "$LIST_FILE") ;;
add_uri)
if URIS="$(zenity --entry --text="$ZG11" --title="$ZG4" --width 600)" ;then add_URIs $URIS;fi ;;
delete_uri)
ARGS=""; for i in $(cat "$LIST_FILE"); do ARGS="$ARGS false $i ";done
if DELETE="$(zenity --list --column "" --column "$ZG5" --checklist --separator " " --title "$ZG5" --text="$ZG5" --width 600 --height 410 $ARGS)";
then
NEW_LIST="$(cat "$LIST_FILE")"
for j in $DELETE
do
NEW_LIST="$(echo "$NEW_LIST" | grep -vx "$j")"
if [ ! -z "$NEW_LIST" ]
then echo "$NEW_LIST"> "$LIST_FILE"
else echo -n > "$LIST_FILE"
fi
done
fi ;;
clean_list)
clean_list ;;
move_uri)
ARGS=""
COUNT=0
for each_line in $(cat "$LIST_FILE"); do COUNT="$(($COUNT+1))"; ARGS="$ARGS $COUNT $each_line"; done
if FROM="$(zenity --list --column "" --column "" --title "$ZG6" --width 600 --height 410 --text "$ZG13" $ARGS)" && TO="$(zenity --list --column "" --column "" --title "$ZG6" --width 600 --height 410 --text "$ZG14" 0 "$ZG12" $ARGS)"
then move_URI "$FROM" "$TO"
fi
;;
stop) stop_download ;;
start)
nohup "$0" "start" >>"$LOG_FILE"& sleep 1
;;
add_file)
if A="$(zenity --file-selection --save --title="$ZG9")";
then add_URIs $(cat "$A")
fi
;;
add_webpage)
if URI="$(zenity --entry --text="$ZG8" --title="$ZG8" --width 600)" && WEB_TEXT="$(wget -O - "$URI")"
then add_URIs $WEB_TEXT
fi
;;
view_log) cat "$LOG_FILE" | zenity --text="$ZG21" --title="$Z21" --width 600 --height 410 --text-info;;
configure)
settings
while OPTION2="$(zenity --title "$ZG22" --hide-column=1 --column "" --column "" --text "$ZG22" --width 600 --height 410 --list download_dir "$ZG24 [$DOWNLOAD_DIR]" list_file "$ZG25 [$LIST_FILE]" download_status_file "$ZG26 [$DOWNLOAD_STATUS_FILE]" log_file "$ZG27 [$LOG_FILE]" permissions "$ZG28 [$PERMISSIONS]" )";do
case $OPTION2 in
download_dir)
if D=$(zenity --title "$ZG24" --file-selection --directory)
then change DOWNLOAD_DIR "$D"
fi
;;
list_file)
if F="$(zenity --title "$ZG25" --file-selection --save)"
then change LIST_FILE "$F"
fi
;;
download_status_file)
if F="$(zenity --title "$ZG26" --file-selection --save)"
then change DOWNLOAD_STATUS_FILE "$F"
fi
;;
log_file)
if F="$(zenity --title "$ZG27" --file-selection --save)"
then change LOG_FILE "$F"
fi
;;
permissions)
if P="$(zenity --title "$ZG28" --text "$ZG29" --entry)"
then change PERMISSIONS "$P"
fi
;;
esac
settings
done
;;
esac
zenity-gui
fi
}
### COMENTARIO IMPORTANTE: TENÍA INTENCIONES DE HACER UN GUI CON KDIALOG, PERO ME DI CUENTA QUE KDIALOG ES ASQUEROSO, POR ESO NO LO HICE.
function Xdialog-gui {
set_status
if [ "$DOWNLOADING" = "yes" ]
then startorstop="stop"; startorstoplabel="$ZG16"
else startorstop="start"; startorstoplabel="$ZG15"
fi
if OPTION="$(Xdialog --stdout --title="$ZG0" --cancel-label "$ZG23" --no-tags --menubox "$(if [ "$WGET_RUNNING" = "yes" ]; then echo -e "$DOING $DOWNLOAD_PROGRESS/$DOWNLOAD_SIZE_H ($DOWNLOAD_PERCENTAGE)\n$ZG19 $DOWNLOAD_TIME_LEFT $ZG20 $DOWNLOAD_SPEED"; else echo "$DOING"; fi)" 20 55 0 actualizar "$ZG2" "$startorstop" "$startorstoplabel" view_list "$ZG3" add_uri "$ZG4" delete_uri "$ZG5" move_uri "$ZG6" clean_list "$ZG7" add_webpage "$ZG8" add_file "$ZG9" configure "$ZG22" view_log "$ZG21")"
then
case "$OPTION" in
view_list)
Xdialog --title "$ZG3" --no-cancel --textbox "$LIST_FILE" 20 55 ;;
add_uri)
if URIS="$(Xdialog --stdout --title="$ZG4" --inputbox "$ZG11" 0 0)" ;then add_URIs $URIS;fi ;;
delete_uri)
ARGS=""; for i in $(cat "$LIST_FILE"); do ARGS="$ARGS $i $i off ";done
if DELETE="$(Xdialog --stdout --separator " " --no-tags --title "$ZG5" --checklist "$ZG5" 20 55 0 $ARGS)";
then
NEW_LIST="$(cat "$LIST_FILE")"
for j in $DELETE
do
NEW_LIST="$(echo "$NEW_LIST" | grep -vx "$j")"
if [ ! -z "$NEW_LIST" ]
then echo "$NEW_LIST"> "$LIST_FILE"
else echo -n > "$LIST_FILE"
fi
done
fi ;;
clean_list)
clean_list ;;
move_uri)
ARGS=""
COUNT=0
for each_line in $(cat "$LIST_FILE"); do COUNT="$(($COUNT+1))"; ARGS="$ARGS $COUNT $each_line"; done
if FROM="$(Xdialog --stdout --title "$ZG6" --menubox "$ZG13" 20 55 0 $ARGS)" && TO="$(Xdialog --stdout --title "$ZG6" --menubox "$ZG14" 20 55 0 0 "$ZG12" $ARGS)"
then move_URI "$FROM" "$TO"
fi
;;
stop)
stop_download ;;
start)
nohup "$0" "start" >>"$LOG_FILE"& sleep 1
;;
add_file)
if A="$(Xdialog --stdout --title="$ZG9" --fselect "*" 0 0)";
then add_URIs $(cat "$A")
fi
;;
add_webpage)
if URI="$(Xdialog --stdout --title="$ZG8" --inputbox "$ZG8" 0 0)" && WEB_TEXT="$(wget -O - "$URI")"
then add_URIs $WEB_TEXT
fi
;;
view_log)
Xdialog --title "$ZG21" --no-cancel --textbox "$LOG_FILE" 20 55 ;;
configure)
settings
while OPTION2="$(Xdialog --stdout --title "$ZG22" --no-tags --menubox "$ZG22" 20 55 0 download_dir "$ZG24 [$DOWNLOAD_DIR]" list_file "$ZG25 [$LIST_FILE]" download_status_file "$ZG26 [$DOWNLOAD_STATUS_FILE]" log_file "$ZG27 [$LOG_FILE]" permissions "$ZG28 [$PERMISSIONS]" )";do
case $OPTION2 in
download_dir)
if D="$(Xdialog --stdout --title "$ZG24" --dselect "*" 20 55)"
then change DOWNLOAD_DIR "$D"
fi
;;
list_file)
if F="$(Xdialog --stdout --title "$ZG25" --fselect "*" 20 55)"
then change LIST_FILE "$F"
fi
;;
download_status_file)
if F="$(Xdialog --stdout --title "$ZG26" --fselect "*" 20 55)"
then change DOWNLOAD_STATUS_FILE "$F"
fi
;;
log_file)
if F="$(Xdialog --stdout --title "$ZG27" --fselect "*" 20 55)"
then change LOG_FILE "$F"
fi
;;
permissions)
if P="$(Xdialog --stdout --title "$ZG28" --inputbox "$ZG29" 0 0)"
then change PERMISSIONS "$P"
fi
;;
esac
settings
done
;;
esac
Xdialog-gui
fi
}
function change {
variable="$1"
value="$2"
TEMPRC="$(grep -v "$variable=" "$CONF_FILE_USER")"
echo "$TEMPRC" > "$CONF_FILE_USER"
echo "$variable=$value" >> "$CONF_FILE_USER"
}
function set_status {
PROCESS="$(ps ax -o pid,args)"
DOWNLOADING="no"
WGET_RUNNING="no"
DOWNLOAD_SIZE=""
DOWNLOAD_SIZE_H=""
DOWNLOAD_PERCENTAGE=""
DOWNLOAD_PROGRESS=""
DOWNLOAD_SPEED=""
DOWNLOAD_TIME_LEFT=""
DOWNLOAD_URI=""
DOING="$ZG18"
if echo "$PROCESS"| grep "$0 start" > "/dev/null"
then
DOWNLOADING="yes"
DOWNLOAD_URI="$(head -n 1 "$DOWNLOAD_STATUS_FILE")"
DOING="$(tail -n 1 "$LOG_FILE")"
if echo "$DOING" | grep "$MSG4" > "/dev/null"
then
WGET_RUNNING="yes"
DOWNLOAD_SIZE="$(cat "$DOWNLOAD_STATUS_FILE" | egrep '[A-Za-Z]*: [0-9]* \(.*\) \[.*\]'|head -n 1 | sed -e 's/^.*: //' | sed -e 's/ .*$//')"
DOWNLOAD_SIZE_H="$(cat "$DOWNLOAD_STATUS_FILE" | egrep '[A-Za-Z]*: [0-9]* \(.*\) \[.*\]' | sed -e 's/^.*(//' | sed -e 's/).*$//')"
DOWNLOAD_PERCENTAGE="$(cat "$DOWNLOAD_STATUS_FILE"| tail -n 2 | head -n 1| awk -F " " '{print $7}')"
DOWNLOAD_PROGRESS="$(cat "$DOWNLOAD_STATUS_FILE"| tail -n 2 | head -n 1| awk -F " " '{print $1}')"
DOWNLOAD_SPEED="$(cat "$DOWNLOAD_STATUS_FILE"| tail -n 2 | head -n 1| awk -F " " '{print $8}')"
DOWNLOAD_TIME_LEFT="$(cat "$DOWNLOAD_STATUS_FILE"| tail -n 2 | head -n 1| awk -F " " '{print $9}')"
fi
fi
}
function settings {
ASCII_TABLE=' !"#$/& ()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^-`abcdefghijklmnopqrstuvwxyz'
DOWNLOAD_DIR="$HOME"
LIST_FILE="$HOME/.megaupbash-list"
DOWNLOAD_STATUS_FILE="$HOME/.megaupbash.download.status"
LOG_FILE="$HOME/.megaupbash.log"
PERMISSIONS="600"
CONF_FILE_GLOBAL="/etc/megaupbash.conf"
CONF_FILE_USER="$HOME/.megaupbash.conf"
if [ -r "$CONF_FILE_GLOBAL" ]
then . "$CONF_FILE_GLOBAL"
fi
if [ -r "$CONF_FILE_USER" ]
then . "$CONF_FILE_USER"
fi
if [ ! -e "$LIST_FILE" ]; then echo -n >"$LIST_FILE"; fi
if [ ! -e "$LOG_FILE" ]; then echo -n >"$LOG_FILE"; fi
if [ ! -e "$DOWNLOAD_STATUS_FILE" ]; then echo -n >"$DOWNLOAD_STATUS_FILE"; fi
if [ ! -e "$CONF_FILE_USER" ]; then echo -n >"$CONF_FILE_USER"; fi
chmod "$PERMISSIONS" "$LIST_FILE";
chmod "$PERMISSIONS" "$LOG_FILE";
chmod "$PERMISSIONS" "$DOWNLOAD_STATUS_FILE";
chmod "$PERMISSIONS" "$CONF_FILE_USER";
case $LANG in
es*)
MSG1="Buscando enlace de descarga para"
MSG3="Esperando que el servidor autorize la descarga"
MSG4="Descargando"
MSG5="Descarga completa"
MSG6="Reintentando"
MSG11="Lista de descargas borrada"
ERR0="Error: No se puede determinar el enlace de descarga para"
ERR1="Error al descargar"
ERR2="Archivo borrado"
ERR3="Limite de descargas excedido, esperando 4 minutos y reintentando"
ERR4="Error: no se puede determinar cuanto tiempo esperar, esperando 300 segundos"
ERR5="Error: Este programa ya está ejecutándose"
ERR6="Error: Permisos insuficientes"
ERR7="Error: no se pudo determinar el codigo captcha"
ERR8="Error al resolver el codigo javascript"
ZG0="MegaupBash"
ZG1="MegaupBash GUI basado en zenity"
ZG2="Actualizar información sobre el estado del programa"
ZG3="Ver lista de descargas"
ZG4="Agregar URI a la lista de descargas"
ZG5="Borrar URI de la lista de descargas"
ZG6="Reordenar lista de descargas"
ZG7="Borrar lista de descargas"
ZG8="Encontrar enlaces en página web"
ZG9="Encontrar enlaces en archivo"
ZG10="Detener las descargas"
ZG11="Agregar URI a la lista de descargas. Se pueden agregar varias separadas por un espacio."
ZG12="Mover al principio de la lista"
ZG13="Mover la descarga"
ZG14="Despues de"
ZG15="Iniciar descargas"
ZG16="Detener descargas"
ZG17="Estado actual:"
ZG18="Detenido"
ZG19="Tiempo restante estimado:"
ZG20="Velocidad de descarga:"
ZG21="Ver log"
ZG22="Modificar configuración"
ZG23="Cerrar ventana"
ZG24="Directorio de descargas"
ZG25="Archivo de lista"
ZG26="Archivo de estado de descarga"
ZG27="Archivo de log"
ZG28="Permisos para archivos"
ZG29="Permisos para archivos. Poner este valor a 666 para permitir a otros usuarios modificar la lista. Ponerlo a 600 para denegarselo."
;;
*)
MSG1="Looking for the download link for"
MSG3="Waiting for the server allow downloading"
MSG4="Downloading"
MSG5="Download complete"
MSG6="Retrying"
MSG11="Download list cleared"
ERR0="Error: Can't find the download link for"
ERR1="Error while downloading"
ERR2="File has been deleted"
ERR3="Download limit exceded, waiting 4 minutes and retrying"
ERR4="Error: can't be determinated the wait time, waiting 300 seconds"
ERR5="Error: this program is already running"
ERR6="Error: Permission denied"
ERR7="Error: Can't determine the captcha code"
ERR8="Error resolving the javascript code"
ZG0="MegaupBash"
ZG1="MegaupBash zenity based GUI"
ZG2="Refresh download status info"
ZG3="View download list"
ZG4="Add URI to the download list"
ZG5="Delete URI from the download list"
ZG6="Move an element in the download list"
ZG7="Delete the download list"
ZG8="Find links in webpage"
ZG9="Find links in file"
ZG10="Stop downloads"
ZG11="Add URI to the download list. Multiple allowed"
ZG12="Move to the begin of the list"
ZG13="Move the download"
ZG14="Under"
ZG15="Start downloads"
ZG16="Stop downloads"
ZG17="Status:"
ZG18="Stopped"
ZG19="Estimated time left:"
ZG20="Download speed:"
ZG21="View log"
ZG22="Settings"
ZG23="Close window"
ZG24="Download directory"
ZG25="List file"
ZG26="Download status file"
ZG27="Log file"
ZG28="File permissions"
ZG29="File permissions. Set this value to 666 will allow to another users to modify de download list. Set it to 600 to deny them."
;;
esac
}
tail -n 375 "$0" > "/tmp/megaupload_captcha.py"
main "$@"
exit
##########################################################
### A partir de esta linea es código extraído de plowshare
### http://code.google.com/p/plowshare/wiki/BreakingMegauploadCaptcha
### Gracias a los desarrolladores de plowshare
##########################################################
#!/usr/bin/python
#
# This file is part of Plowshare.
#
# Plowshare is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Plowshare is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Plowshare. If not, see .
#
# Author: Arnau Sanchez
"""
Decode the captcha used by Megaupload (2009/03/20): 4 bold characters
(letter-letter-letter-digit), rotated and overlapped.
Dependencies: Tesseract-ocr, Python Imaging Library
"""
import os
import sys
import logging
import string
import tempfile
import operator
import subprocess
from itertools import tee, izip, groupby
from StringIO import StringIO
# Third-party modules
import PIL.Image as Image
# Verbose levels
_VERBOSE_LEVELS = {
0: logging.CRITICAL,
1: logging.ERROR,
2: logging.WARNING,
3: logging.INFO,
4: logging.DEBUG,
}
# Generic functions
def replace_chars(s, table):
"""Use dictionary table to replace chare in s."""
for key, value in table.iteritems():
s = s.replace(key, value)
return s
def center_of_mass(coords):
"""Return center of mass of coordinates"""
return [sum(lst)/len(lst) for lst in zip(*coords)]
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = tee(iterable)
b.next()
return izip(a, b)
def distance2(vector1, vector2):
"""Return quadratic module distance between vector1 and vector2"""
return sum((x*x) for x in map(operator.sub, vector1, vector2))
def combinations_no_repetition(seq, k):
"""Yield combinations of k elements from seq without repetition"""
if k > 0:
for index, x in enumerate(seq):
for y in combinations_no_repetition(seq[index+1:], k-1):
yield (x,)+y
else: yield ()
def union_sets(sets):
"""Return union of sets."""
return reduce(set.union, sets)
def segment(seq, k):
"""Return a segmentation of elements in seq in groups of k."""
if k > 1:
for length in range(1, (len(seq)-k+1)+1):
for x in segment(seq[length:], k-1):
yield (seq[:length],) + x
else: yield (seq,)
def run(command, inputdata=None):
"""Run a command and return standard output"""
pipe = subprocess.PIPE
popen = subprocess.Popen(command, stdout=pipe, stderr=pipe)
outputdata = popen.communicate(inputdata)[0]
assert (popen.returncode == 0), "Error running: %s" % command
return outputdata
def ocr(image):
"""OCR an image and return text"""
temp_tif = tempfile.NamedTemporaryFile(suffix=".tif")
temp_txt = tempfile.NamedTemporaryFile(suffix=".txt")
image.save(temp_tif, format="TIFF")
run(["tesseract", temp_tif.name, os.path.splitext(temp_txt.name)[0]])
return open(temp_txt.name).read()
def histogram(it, reverse=False):
"""Return sorted (ascendent) histogram of elements in iterator."""
pairs = ((value, len(list(grp))) for (value, grp) in groupby(sorted(it)))
return sorted(pairs, key=lambda (k, v): v, reverse=reverse)
def get_pair_inclussion(seq, value, pred=None):
"""Given a sequence find the boundaries of value."""
if pred is None:
pred = lambda x: x
for val1, val2 in pairwise(seq):
if pred(val1) <= value <= pred(val2):
return val1, val2
# Generic PIL functions
def smooth(image0, value):
"""Smooth image spreading values of a pixel."""
image = image0.copy()
ipimage0 = image0.load()
ipimage = image.load()
width, height = image.size
for x, y in iter_image(image):
if ipimage0[x, y] == value:
if x+1 < width:
ipimage[x+1, y] = value
if y+1 < height:
ipimage[x, y+1] = value
return image
def merge_image_with_pixels(image0, pixels, value):
"""Set pixels in image to given value and return new image."""
image = image0.copy()
ipimage = image.load()
for (x, y) in pixels:
ipimage[x, y] = value
return image
def floodfill_image(image0, (x, y), fill_color, threshold=0):
"""Flood fill image with fill color in given position.
Return a tuple with filled image and list of positions of filled pixels.
See http://mail.python.org/pipermail/image-sig/2005-September/003559.html
"""
image = image0.copy()
width, height = image.size
def is_within((x, y)):
"""Return True if (x, y) is inside image"""
return (0 <= x < width and 0 <= y < height)
ipimage = image.load()
background_value = ipimage[x, y]
ipimage[x, y] = fill_color
edge = [(x, y)]
filled = set(edge)
while edge:
newedge = []
for x, y in edge:
for s, t in ((x+1, y), (x-1, y), (x, y+1), (x, y-1)):
if (s, t) in filled or not is_within((s, t)):
continue
pixel = ipimage[s, t]
if abs(pixel - background_value) <= threshold:
ipimage[s, t] = fill_color
newedge.append((s, t))
filled.update(newedge)
edge = newedge
return image, filled
def iter_image(image):
"""Yield (x, y) pairs to walk an image positions."""
w, h = image.size
return ((x, y) for x in range(w) for y in range(h))
def get_zones(image, seen0, value, minpixels=1):
"""
Scan an image and return groups of pixels with a given value
having the same color. Ignore pixels already found in seen.
Ignore groups with less thatn minpixels.
"""
seen = seen0.copy()
pixels = image.load()
for x, y in iter_image(image):
if (x, y) not in seen and pixels[x, y] == value:
filled = floodfill_image(image, (x, y), 50)[1]
if len(filled) > minpixels:
seen.update(filled)
yield filled
def new_image_from_pixels(pixels, value):
"""Return an image from a group of pixels (remove offset)."""
xs, ys = zip(*pixels)
x1, y1 = min(xs), min(ys)
x2, y2 = max(xs), max(ys)
image = Image.new("L", (x2-x1+1, y2-y1+1), 255)
ipimage = image.load()
for (x, y) in pixels:
ipimage[x-x1, y-y1] = value
return image
def join_images_horizontal(images):
"""Join images to build a new image with (width, height) size."""
width = sum(i.size[0] for i in images)
height = max(i.size[1] for i in images)
himage = Image.new("L", (width, height), 255)
x = 0
for image in images:
w, h = image.size
himage.paste(image, (x, (height - h)/2))
x += w
return himage
### Megaupload captcha decoder functions
def filter_word(word0):
"""Check if a word is a valid captcha (make also some basic corrections)."""
def string2dict(s):
"""Convert pairs of chars in string to dictionary.
Example: ('AB CD') -> {'A': 'B'}, {'C': 'D'}."""
return dict(tuple(pair) for pair in s.split())
str_digit_to_letter = "1T 2Z 4A 5S 6G 7T 8B"
str_letter_to_letter = "{C (C [C IC"
allowed_chars = string.uppercase + string.digits
digit_to_letter = string2dict(str_digit_to_letter)
letter_to_letter = string2dict(str_letter_to_letter)
letter_to_digit = dict((v, k) for (k, v) in digit_to_letter.iteritems())
wordlst1 = list(word0.upper().replace(" ", ""))
if len(wordlst1) != 4:
return
wordlst2 = [replace_chars(replace_chars(w, digit_to_letter),
letter_to_letter) for w in wordlst1[:3]] + \
[replace_chars(wordlst1[3], letter_to_digit)]
wordlst = [c for c in wordlst2 if c in allowed_chars]
if len(wordlst) == 4 and (wordlst[0] in string.uppercase and
wordlst[1] in string.uppercase and
wordlst[2] in string.uppercase and
wordlst[3] in string.digits):
return "".join(wordlst)
def get_error(pixels_list, image):
"""Return error for a given pixels groups againt the expected positions."""
width, height = image.size
gap_width = width / 6.0
def error_for_pixels(pixels, n):
"""Return error for pixels (character n in captcha)."""
com_x, com_y = center_of_mass(pixels)
return distance2((com_x, com_y), ((1.5*n+1)*gap_width, (height/2.0)))
return sum(error_for_pixels(pxls, n) for n, pxls in enumerate(pixels_list))
def build_candidates(characters4_pixels_list, uncertain_pixels,
rotation=22):
"""Build word candidates from characters and uncertains groups."""
for plindex, characters4_pixels in enumerate(characters4_pixels_list):
logging.debug("Generating words (%d) %d/%d", 2**len(uncertain_pixels),
plindex+1, len(characters4_pixels_list))
for length in range(len(uncertain_pixels)+1):
for groups in combinations_no_repetition(uncertain_pixels, length):
characters4_pixels_test = [x.copy() for x in characters4_pixels]
for pixels in groups:
pair = get_pair_inclussion(characters4_pixels_test,
center_of_mass(pixels)[0],
pred=lambda x: center_of_mass(x)[0])
if not pair:
continue
char1, char2 = pair
char1.update(pixels)
char2.update(pixels)
def rotate_character(pixels, index):
"""Rotate captcha character in position index."""
image = new_image_from_pixels(pixels, 1)
angle = rotation * (+1 if (index % 2 == 0) else -1)
rotated_image = image.rotate(angle, expand=True)
return rotated_image.point(lambda x: 0 if x == 1 else 255)
images = [rotate_character(pixels, cindex)
for cindex, pixels in enumerate(characters4_pixels_test)]
clean_image = smooth(join_images_horizontal(images), 0)
text = ocr(clean_image).strip()
filtered_text = filter_word(text)
#logging.debug("%s -> %s", text, filtered_text)
if filtered_text:
yield filtered_text
def decode_megaupload_captcha(imagedata, maxiterations=1):
"""Decode a Megaupload catpcha image
Expected 4 letters (LETTER LETTER LETTER DIGIT), rotated and overlapped"""
original = Image.open(imagedata)
# Get background zone
width, height = original.size
image = Image.new("L", (width+2, height+2), 255)
image.paste(original, (1, 1))
background_pixels = floodfill_image(image, (0, 0), 155)[1]
logging.debug("Background pixels: %d", len(background_pixels))
# Get characters zones
characters_pixels = sorted(get_zones(image, background_pixels, 0, 10),
key=center_of_mass)
logging.debug("Characters: %d - %s", len(characters_pixels),
[len(x) for x in characters_pixels])
if len(characters_pixels) < 4:
logging.error("Need at least 4 characters zones in image (%d found)",
len(characters_pixels))
return
characters_pixels_list0 = [[union_sets(sets) for sets in x]
for x in segment(characters_pixels, 4)]
characters4_pixels_list = sorted(characters_pixels_list0,
key=lambda pixels_list: get_error(pixels_list, image))[:maxiterations]
# Get uncertain zones
seen = union_sets([background_pixels] + characters_pixels)
max_uncertain_groups = 6
uncertain_pixels = list(sorted(get_zones(image, seen, 255, 20),
key=len))[:max_uncertain_groups]
logging.debug("Uncertain groups: %d - %s", len(uncertain_pixels),
[len(pixels) for pixels in uncertain_pixels])
# Build candidates
candidates = build_candidates(characters4_pixels_list, uncertain_pixels)
# Return best decoded word
candidates_histogram = [histogram(charpos, reverse=True) for charpos in
zip(*[list(candidate) for candidate in candidates])]
if not candidates_histogram:
logging.warning("No word candidates")
return
logging.info("Best words: %s", candidates_histogram)
best = [x[0][0] for x in candidates_histogram]
return "".join(best)
def set_verbose_level(verbose_level):
"""Set verbose level for logging.
See _VERBOSE_LEVELS constant for allowed values."""
level = _VERBOSE_LEVELS[max(0, min(verbose_level, len(_VERBOSE_LEVELS)-1))]
logging.basicConfig(level=level, stream=sys.stderr,
format='%(levelname)s: %(message)s')
def main(args):
"""Main function for megaupload captcha decoder."""
import optparse
usage = """usage: megaupload_captcha [OPTIONS] [IMAGE_FILE]
Decode Megaupload captcha."""
parser = optparse.OptionParser(usage)
parser.add_option('-v', '--verbose', dest='verbose_level',
action="count", default=None,
help='Increate verbose level (0: CRITICAL ... 4: DEBUG)')
parser.add_option('-i', '--max-iterations', dest='max_iterations',
default=1, metavar='NUM', type='int',
help='Maximum iterations for characters agrupations')
options, args0 = parser.parse_args(args)
if not args0:
parser.print_help()
return 1
set_verbose_level((1 if options.verbose_level is None
else options.verbose_level))
filename, = args0
stream = (sys.stdin if filename == "-" else open(filename))
logging.debug("Maximum iterations: %s" % options.max_iterations)
captcha = decode_megaupload_captcha(StringIO(stream.read()),
options.max_iterations)
if not captcha:
logging.error("Cannot decode captcha image")
return 1
sys.stdout.write(captcha+"\n")
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))