Use lambda function and sorted function to sort a complex dictionary in Python
>>> a={"a":[1,"a"], "b":[2,"b"], "c":[0,"A"], "d":[-2, "z"]}
>>> a.items()
[('a', [1, 'a']), ('c', [0, 'A']), ('b', [2, 'b']), ('d', [-2, 'z'])]
>>> sorted(a.items(), lambda x, y : cmp(x[1][0], y[1][0]))
[('d', [-2, 'z']), ('c', [0, 'A']), ('a', [1, 'a']), ('b', [2, 'b'])]
>>>sorted(a.items(), lambda x, y : cmp(x[1][1], y[1][1]))
[('c', [0, 'A']), ('a', [1, 'a']), ('b', [2, 'b']), ('d', [-2, 'z'])]
It could be useful.
A blog about the system and programming of *nix environment. It covers: * Scripts that I am familiar with include shell scripts, Perl, Python, Tcl/Tk, and PHP. * Utilities includes sed, grep, awk, cat, tac, ... * Kernel development and optimization, device driver * Networking * Toolchain, build environment
Tuesday, August 15, 2006
Thursday, March 23, 2006
A Python script to get files from FTP site easily
#!/usr/bin/python
from ftplib import FTP
import netrc
import re
import sys
directory = {"siteid":["hostname", "root directory of your ftp site"], "another_site":["another host", "another root directory"]}
if len(sys.argv) > 2:
host = sys.argv[1]
off_file = 2
else:
host = "hostname"
off_file = 1
if not directory.has_key(host):
print host, "is not currently supported"
sys.exit(1)
# we use .netrc to store userid and password info.
net = netrc.netrc()
(user, acct, passwd) = net.authenticators(directory[host][0])
def main(argv):
print "connecting ...", directory[host][0]
ftp = FTP(directory[host][0])
print "login'ing ...", directory[host][0]
ftp.login(user, passwd)
for i in range(len(argv)):
sys.stdout.write("getting ... " + argv[i])
p = re.compile('(.*)(/[^/]+)')
m = p.match(argv[i])
if m:
path = m.group(1)
filename = m.group(2)[1:]
ftp.cwd(directory[host][1] + path)
if len(path):
file = open(argv[i], 'wb')
else:
file = open(filename, 'wb')
ftp.retrbinary("RETR " + filename, file.write)
print " done"
file.close()
else:
print "no matching .."
ftp.close()
if __name__ == '__main__':
if (len(sys.argv) > 1):
main(sys.argv[off_file:])
else:
print "Usage: get [host] file_to_be_get\nHost: siteid(default), another_site\n"
from ftplib import FTP
import netrc
import re
import sys
directory = {"siteid":["hostname", "root directory of your ftp site"], "another_site":["another host", "another root directory"]}
if len(sys.argv) > 2:
host = sys.argv[1]
off_file = 2
else:
host = "hostname"
off_file = 1
if not directory.has_key(host):
print host, "is not currently supported"
sys.exit(1)
# we use .netrc to store userid and password info.
net = netrc.netrc()
(user, acct, passwd) = net.authenticators(directory[host][0])
def main(argv):
print "connecting ...", directory[host][0]
ftp = FTP(directory[host][0])
print "login'ing ...", directory[host][0]
ftp.login(user, passwd)
for i in range(len(argv)):
sys.stdout.write("getting ... " + argv[i])
p = re.compile('(.*)(/[^/]+)')
m = p.match(argv[i])
if m:
path = m.group(1)
filename = m.group(2)[1:]
ftp.cwd(directory[host][1] + path)
if len(path):
file = open(argv[i], 'wb')
else:
file = open(filename, 'wb')
ftp.retrbinary("RETR " + filename, file.write)
print " done"
file.close()
else:
print "no matching .."
ftp.close()
if __name__ == '__main__':
if (len(sys.argv) > 1):
main(sys.argv[off_file:])
else:
print "Usage: get [host] file_to_be_get\nHost: siteid(default), another_site\n"
Thursday, March 16, 2006
My user script to keep only news content in creaders.net and wenxuecity.com
// ==UserScript==
// @name Keep Only Interested Content
// @namespace http://leochen.net
// @description A script to remove all the un-necessary elements and display plain interested content only (version 0.3)
// @include http://*.wenxuecity.com/*
// @include http://*.creaders.net/*
// @include http://*.bbsland.com/*
// ==/UserScript==
var body = document.body;
var theContent = new Array();
var numContent = 0;
/* for wenxuecity.com */
if (/wenxuecity/.test(document.URL)) {
if (/www/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[3].childNodes[1].childNodes[10].childNodes[1].childNodes[0].childNodes[1].childNodes[0].childNodes[5].childNodes[2].childNodes[1].childNodes[2].childNodes[1].childNodes[0];
} else if (/news/.test(document.URL)) {
var content = document.body.childNodes[3].childNodes[1].childNodes[10].childNodes[1].childNodes[0].childNodes[1].childNodes[0].childNodes[5].childNodes[2].childNodes[1].childNodes[0].childNodes[1].childNodes[6];
theContent[numContent++] = content.childNodes[1].childNodes[2].childNodes[1].childNodes[1].childNodes[1].childNodes[0];
}
}
/* for creaders.net */
if (/creaders.net/.test(document.URL)) {
if (/headline/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[9].childNodes[1].childNodes[0].childNodes[3].childNodes[3];
} else if (/digest/.test(document.URL)) {
if (/pool/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[5].childNodes[13];
theContent[numContent++] = document.body.childNodes[5].childNodes[19].childNodes[1].childNodes[2].childNodes[3];
} else {
theContent[numContent++] = document.body.childNodes[5].childNodes[0].childNodes[8].childNodes[7];
}
} else if (/dailynews/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[5].childNodes[3].childNodes[1].childNodes[0].childNodes[3].childNodes[1];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[1].childNodes[19];
theContent[numContent++] = document.body.childNodes[1].childNodes[23];
theContent[numContent++] = document.body.childNodes[1].childNodes[27];
theContent[numContent++] = document.body.childNodes[1].childNodes[31];
}
}
if (/bbsland.com/.test(document.URL)) {
if (/bcchinese/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[16];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[1].childNodes[15];
theContent[numContent++] = document.body.childNodes[1].childNodes[25];
theContent[numContent++] = document.body.childNodes[1].childNodes[36];
}
}
if (/life/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[14].childNodes[7];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[1].childNodes[15];
theContent[numContent++] = document.body.childNodes[1].childNodes[20];
theContent[numContent++] = document.body.childNodes[1].childNodes[29];
}
}
if (/military/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[16];
theContent[numContent++] = document.body.childNodes[1].childNodes[19];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[16];
theContent[numContent++] = document.body.childNodes[1].childNodes[18];
theContent[numContent++] = document.body.childNodes[1].childNodes[22];
theContent[numContent++] = document.body.childNodes[1].childNodes[29];
theContent[numContent++] = document.body.childNodes[1].childNodes[33];
}
}
if (/general/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[19];
}
}
if (/politics/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[19];
} else {
theContent[numContent++] = document.body.childNodes[11];
}
}
if (/sports/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[14];
} else {
theContent[numContent++] = document.body.childNodes[0].childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[0].childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[0].childNodes[1].childNodes[15];
theContent[numContent++] = document.body.childNodes[0].childNodes[1].childNodes[22];
theContent[numContent++] = document.body.childNodes[0].childNodes[1].childNodes[33];
}
}
if (/child/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[14];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[5];
theContent[numContent++] = document.body.childNodes[1].childNodes[6];
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[21];
}
}
if (/tea/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[16];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[1].childNodes[15];
theContent[numContent++] = document.body.childNodes[1].childNodes[20];
theContent[numContent++] = document.body.childNodes[1].childNodes[31];
}
}
if (/joke/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[1].childNodes[15];
theContent[numContent++] = document.body.childNodes[1].childNodes[20];
theContent[numContent++] = document.body.childNodes[1].childNodes[29];
}
}
if (/iwish/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[15];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[1].childNodes[15];
theContent[numContent++] = document.body.childNodes[1].childNodes[22];
theContent[numContent++] = document.body.childNodes[1].childNodes[33];
}
}
if (/education/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[5];
theContent[numContent++] = document.body.childNodes[1].childNodes[6].childNodes[8];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[1].childNodes[17];
theContent[numContent++] = document.body.childNodes[1].childNodes[27];
theContent[numContent++] = document.body.childNodes[1].childNodes[41];
}
}
if (/newland/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[15];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[22];
theContent[numContent++] = document.body.childNodes[1].childNodes[33];
theContent[numContent++] = document.body.childNodes[1].childNodes[37];
}
}
}
var len = body.childNodes.length;
/* remove all content */
for (i=0; i< len; i++) {
body.removeChild(body.childNodes[0]);
}
/* shown only interested elements */
for (i=0; i< numContent; i++) {
body.appendChild(theContent[i]);
}
// @name Keep Only Interested Content
// @namespace http://leochen.net
// @description A script to remove all the un-necessary elements and display plain interested content only (version 0.3)
// @include http://*.wenxuecity.com/*
// @include http://*.creaders.net/*
// @include http://*.bbsland.com/*
// ==/UserScript==
var body = document.body;
var theContent = new Array();
var numContent = 0;
/* for wenxuecity.com */
if (/wenxuecity/.test(document.URL)) {
if (/www/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[3].childNodes[1].childNodes[10].childNodes[1].childNodes[0].childNodes[1].childNodes[0].childNodes[5].childNodes[2].childNodes[1].childNodes[2].childNodes[1].childNodes[0];
} else if (/news/.test(document.URL)) {
var content = document.body.childNodes[3].childNodes[1].childNodes[10].childNodes[1].childNodes[0].childNodes[1].childNodes[0].childNodes[5].childNodes[2].childNodes[1].childNodes[0].childNodes[1].childNodes[6];
theContent[numContent++] = content.childNodes[1].childNodes[2].childNodes[1].childNodes[1].childNodes[1].childNodes[0];
}
}
/* for creaders.net */
if (/creaders.net/.test(document.URL)) {
if (/headline/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[9].childNodes[1].childNodes[0].childNodes[3].childNodes[3];
} else if (/digest/.test(document.URL)) {
if (/pool/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[5].childNodes[13];
theContent[numContent++] = document.body.childNodes[5].childNodes[19].childNodes[1].childNodes[2].childNodes[3];
} else {
theContent[numContent++] = document.body.childNodes[5].childNodes[0].childNodes[8].childNodes[7];
}
} else if (/dailynews/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[5].childNodes[3].childNodes[1].childNodes[0].childNodes[3].childNodes[1];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[1].childNodes[19];
theContent[numContent++] = document.body.childNodes[1].childNodes[23];
theContent[numContent++] = document.body.childNodes[1].childNodes[27];
theContent[numContent++] = document.body.childNodes[1].childNodes[31];
}
}
if (/bbsland.com/.test(document.URL)) {
if (/bcchinese/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[16];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[1].childNodes[15];
theContent[numContent++] = document.body.childNodes[1].childNodes[25];
theContent[numContent++] = document.body.childNodes[1].childNodes[36];
}
}
if (/life/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[14].childNodes[7];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[1].childNodes[15];
theContent[numContent++] = document.body.childNodes[1].childNodes[20];
theContent[numContent++] = document.body.childNodes[1].childNodes[29];
}
}
if (/military/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[16];
theContent[numContent++] = document.body.childNodes[1].childNodes[19];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[16];
theContent[numContent++] = document.body.childNodes[1].childNodes[18];
theContent[numContent++] = document.body.childNodes[1].childNodes[22];
theContent[numContent++] = document.body.childNodes[1].childNodes[29];
theContent[numContent++] = document.body.childNodes[1].childNodes[33];
}
}
if (/general/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[19];
}
}
if (/politics/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[19];
} else {
theContent[numContent++] = document.body.childNodes[11];
}
}
if (/sports/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[14];
} else {
theContent[numContent++] = document.body.childNodes[0].childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[0].childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[0].childNodes[1].childNodes[15];
theContent[numContent++] = document.body.childNodes[0].childNodes[1].childNodes[22];
theContent[numContent++] = document.body.childNodes[0].childNodes[1].childNodes[33];
}
}
if (/child/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[14];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[5];
theContent[numContent++] = document.body.childNodes[1].childNodes[6];
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[21];
}
}
if (/tea/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[16];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[1].childNodes[15];
theContent[numContent++] = document.body.childNodes[1].childNodes[20];
theContent[numContent++] = document.body.childNodes[1].childNodes[31];
}
}
if (/joke/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[1].childNodes[15];
theContent[numContent++] = document.body.childNodes[1].childNodes[20];
theContent[numContent++] = document.body.childNodes[1].childNodes[29];
}
}
if (/iwish/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[15];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[1].childNodes[15];
theContent[numContent++] = document.body.childNodes[1].childNodes[22];
theContent[numContent++] = document.body.childNodes[1].childNodes[33];
}
}
if (/education/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[5];
theContent[numContent++] = document.body.childNodes[1].childNodes[6].childNodes[8];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[13];
theContent[numContent++] = document.body.childNodes[1].childNodes[17];
theContent[numContent++] = document.body.childNodes[1].childNodes[27];
theContent[numContent++] = document.body.childNodes[1].childNodes[41];
}
}
if (/newland/.test(document.URL)) {
if (/messages/.test(document.URL)) {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[15];
} else {
theContent[numContent++] = document.body.childNodes[1].childNodes[11];
theContent[numContent++] = document.body.childNodes[1].childNodes[22];
theContent[numContent++] = document.body.childNodes[1].childNodes[33];
theContent[numContent++] = document.body.childNodes[1].childNodes[37];
}
}
}
var len = body.childNodes.length;
/* remove all content */
for (i=0; i< len; i++) {
body.removeChild(body.childNodes[0]);
}
/* shown only interested elements */
for (i=0; i< numContent; i++) {
body.appendChild(theContent[i]);
}
my .screenrc file
startup_message off # default: on
# Affects the copying of text regions
crlf off # default: off
#vbell off
vbell_msg " __bell__ ! "
defscrollback 3300 # default: 100
#nethack on
bindkey -k kI copy
bindkey "^n" screen bash
bindkey "^b" next
bindkey "^v" prev
#bindkey "^p" prev
#bindkey "^1" select 0
#bindkey "^2" select 1
#bindkey "^3" select 2
hardstatus alwayslastline " %{= wk} %c | %d.%m.%Y | %{= Bw} %w %{= dd} "
screen -t "bash" 0 bash
# Affects the copying of text regions
crlf off # default: off
#vbell off
vbell_msg " __bell__ ! "
defscrollback 3300 # default: 100
#nethack on
bindkey -k kI copy
bindkey "^n" screen bash
bindkey "^b" next
bindkey "^v" prev
#bindkey "^p" prev
#bindkey "^1" select 0
#bindkey "^2" select 1
#bindkey "^3" select 2
hardstatus alwayslastline " %{= wk} %c | %d.%m.%Y | %{= Bw} %w %{= dd} "
screen -t "bash" 0 bash
Tuesday, March 14, 2006
Tuesday, March 07, 2006
CD dos path in unix environment
I am working under Windows, but I use cygwin in most of the time.
Sometimes, I have to change directory in cygwin to a path with UNC format (\\machine\path\to\xx)
I'm tired of typing the path myself, because in Unix, we have to use (//machine/path/to/xx) format.
Here comes a simple bash function to do all the conversion and change directory for me.
------------------------
function cddos () {
dos_path=$1;
cd `echo $dos_path | sed 's/\\\/\//g'`;
}
------------------------
Usage:
cddos '\\your\dos\path'
I love Bash function! It should be better to bash function instead of external bash script for such kind of small function.
Sometimes, I have to change directory in cygwin to a path with UNC format (\\machine\path\to\xx)
I'm tired of typing the path myself, because in Unix, we have to use (//machine/path/to/xx) format.
Here comes a simple bash function to do all the conversion and change directory for me.
------------------------
function cddos () {
dos_path=$1;
cd `echo $dos_path | sed 's/\\\/\//g'`;
}
------------------------
Usage:
cddos '\\your\dos\path'
I love Bash function! It should be better to bash function instead of external bash script for such kind of small function.
Thursday, March 02, 2006
A Perl oneliner to extract opcode from a formatted asm source file.
I created a simple perl script to help my co-worker to extract the opcode from some assembly source files.
-----------------------------
Sample Input:
Reset_Handler
$a
Init
0xc0200000: e59ff190 .... LDR pc,[pc,#400] ; [0xc0200198] = 0xc0200004
Instruct_2
0xc0200004: e59f0190 .... LDR r0,[pc,#400] ; [0xc020019c] = 0xc01e0000
0xc0200008: e321f0d1 ..!. MSR CPSR_c,#0xd1
Sample Output:
90
f1
9f
e5
//
90
01
9f
e5
//
d1
f0
21
e3
//
00
d0
40
e2
//
----------------------------------------------
My oneliner version:
perl -e 'map {print "$4\n$3\n$2\n$1\n//\n" if (/^\s+0x\S+:\s+(\S\S)(\S\S)(\S\S)(\S\S)\s+/);} <>; '
-----------------------------
Sample Input:
Reset_Handler
$a
Init
0xc0200000: e59ff190 .... LDR pc,[pc,#400] ; [0xc0200198] = 0xc0200004
Instruct_2
0xc0200004: e59f0190 .... LDR r0,[pc,#400] ; [0xc020019c] = 0xc01e0000
0xc0200008: e321f0d1 ..!. MSR CPSR_c,#0xd1
Sample Output:
90
f1
9f
e5
//
90
01
9f
e5
//
d1
f0
21
e3
//
00
d0
40
e2
//
----------------------------------------------
My oneliner version:
perl -e 'map {print "$4\n$3\n$2\n$1\n//\n" if (/^\s+0x\S+:\s+(\S\S)(\S\S)(\S\S)(\S\S)\s+/);} <>; '
Wednesday, February 22, 2006
Enhanced webbot to grab ads. info from vansky.com
This is an enhanced version of my webbot script to extract ad. info from vansky.com.
---------------------------------------------
#!/usr/bin/perl -w
# Hao Chen
# The purpose of this script is to extract ad info. from vansky.com website
# and write the data to grab.dat, email.dat files.
#
# grab.log file records the id of ads. grabbed to avoid redundant work.
#
use strict;
use LWP::UserAgent;
my $url_hp = 'http://www.vansky.com/vanphp/gg/newsgroup.php';
my $url_root = 'http://www.vansky.com/vanphp/gg/shownews.php?id=';
# starting id
my $start = 50000;
# ending id
my $end = 0;
# wait seconds
my $wait = 1;
my $ua = LWP::UserAgent->new;
$ua->agent( 'Mozilla/5.0' );
my ( $url, $req, $res );
my $verbose = 1;
$req = HTTP::Request->new( GET => $url_hp );
$res = $ua->request( $req );
if ( $res->is_success )
{
foreach ( split( "\n", $res->content ) )
{
if ( /pageno_c=(.*?)shownews\.php\?id=(\d*?)'\)/ )
{
$end = $2;
last;
}
}
}
open( FILE, 'grab.log' ) or die "Can't open file grab.log\n";
my @log = <FILE>;
close( FILE );
my $num_lines = scalar @log;
if ( $num_lines && $log[ $num_lines - 1 ] =~ / => (\d+) - (\d+)/ )
{
$start = $2;
} else
{
print STDERR "brand new task: start = $start\n";
}
if ( $end > $start )
{
print STDERR "new grab task: $start - $end\n";
} else
{
print STDERR "no new grab tasks!\n";
exit;
}
open( FILE, '>>grab.log' ) or die "Can't open file grab.log\n";
my $currTime = localtime;
print FILE $currTime . ' => ' . $start . ' - ' . $end . "\n";
close( FILE );
print "######## grab $start to $end #########\n";
open( FILE, '>>grab.dat' ) or die "Can't open file grab.dat\n";
open( EMAIL, '>>email.dat' ) or die "Can't open file email.dat\n";
print FILE "##### $currTime => grab ad. $start to $end\n";
print EMAIL "##### $currTime => grab email. $start to $end\n";
for ( my $id = $start; $id <= $end; $id++ )
{
$url = $url_root . $id;
print STDERR $url . "\n" if ( $verbose );
$req = HTTP::Request->new( GET => $url );
$res = $ua->request( $req );
if ( $res->is_success )
{
my @content = split( "\n", $res->content );
my $the_ad;
my $start_ad = 0;
foreach ( @content )
{
chop;
if ( /Address:/ )
{ #found the ad. line
$start_ad = 1;
}
$the_ad .= $_ if ( $start_ad );
if ( /<\/pre>/ )
{ #end of ad.
last;
}
}
if ( $the_ad =~ /<font color=darkblue size=5>(.*?)<\/td>.*Author: <\/b>(.*?)<\/td>.*Email:<\/b> (.*?)<\/td>.*Tel\.:<\/b><\/td><td align=left>(.*?)<\/td>.*Address:<\/b> (.*?)<\/td>.*<pre>(.*?)<\/pre>/ )
{
my $title = $1;
my $author = $2;
my $email = $3;
my $tel = $4;
my $address = $5;
my $ad = $6;
if ( $email =~ /.+\@.+\..+/ )
{
$email =~ s/ //g;
print EMAIL lc( $email ) . "\n";
}
print STDERR $id . ' : ' . $tel . ' : ' . lc( $email ) . "\n" if ( $verbose );
$ad =~ s/[\n|\r]//g;
print FILE $id . ' : ' . $title . ' : ' . $author . ' : ' . $email . ' : ' . $tel . ' : ' . $address . ' : ' . $ad . "\n";
}
sleep $wait;
}
}
close( FILE );
close( EMAIL );
exit;
---------------------------------------------
#!/usr/bin/perl -w
# Hao Chen
# The purpose of this script is to extract ad info. from vansky.com website
# and write the data to grab.dat, email.dat files.
#
# grab.log file records the id of ads. grabbed to avoid redundant work.
#
use strict;
use LWP::UserAgent;
my $url_hp = 'http://www.vansky.com/vanphp/gg/newsgroup.php';
my $url_root = 'http://www.vansky.com/vanphp/gg/shownews.php?id=';
# starting id
my $start = 50000;
# ending id
my $end = 0;
# wait seconds
my $wait = 1;
my $ua = LWP::UserAgent->new;
$ua->agent( 'Mozilla/5.0' );
my ( $url, $req, $res );
my $verbose = 1;
$req = HTTP::Request->new( GET => $url_hp );
$res = $ua->request( $req );
if ( $res->is_success )
{
foreach ( split( "\n", $res->content ) )
{
if ( /pageno_c=(.*?)shownews\.php\?id=(\d*?)'\)/ )
{
$end = $2;
last;
}
}
}
open( FILE, 'grab.log' ) or die "Can't open file grab.log\n";
my @log = <FILE>;
close( FILE );
my $num_lines = scalar @log;
if ( $num_lines && $log[ $num_lines - 1 ] =~ / => (\d+) - (\d+)/ )
{
$start = $2;
} else
{
print STDERR "brand new task: start = $start\n";
}
if ( $end > $start )
{
print STDERR "new grab task: $start - $end\n";
} else
{
print STDERR "no new grab tasks!\n";
exit;
}
open( FILE, '>>grab.log' ) or die "Can't open file grab.log\n";
my $currTime = localtime;
print FILE $currTime . ' => ' . $start . ' - ' . $end . "\n";
close( FILE );
print "######## grab $start to $end #########\n";
open( FILE, '>>grab.dat' ) or die "Can't open file grab.dat\n";
open( EMAIL, '>>email.dat' ) or die "Can't open file email.dat\n";
print FILE "##### $currTime => grab ad. $start to $end\n";
print EMAIL "##### $currTime => grab email. $start to $end\n";
for ( my $id = $start; $id <= $end; $id++ )
{
$url = $url_root . $id;
print STDERR $url . "\n" if ( $verbose );
$req = HTTP::Request->new( GET => $url );
$res = $ua->request( $req );
if ( $res->is_success )
{
my @content = split( "\n", $res->content );
my $the_ad;
my $start_ad = 0;
foreach ( @content )
{
chop;
if ( /Address:/ )
{ #found the ad. line
$start_ad = 1;
}
$the_ad .= $_ if ( $start_ad );
if ( /<\/pre>/ )
{ #end of ad.
last;
}
}
if ( $the_ad =~ /<font color=darkblue size=5>(.*?)<\/td>.*Author: <\/b>(.*?)<\/td>.*Email:<\/b> (.*?)<\/td>.*Tel\.:<\/b><\/td><td align=left>(.*?)<\/td>.*Address:<\/b> (.*?)<\/td>.*<pre>(.*?)<\/pre>/ )
{
my $title = $1;
my $author = $2;
my $email = $3;
my $tel = $4;
my $address = $5;
my $ad = $6;
if ( $email =~ /.+\@.+\..+/ )
{
$email =~ s/ //g;
print EMAIL lc( $email ) . "\n";
}
print STDERR $id . ' : ' . $tel . ' : ' . lc( $email ) . "\n" if ( $verbose );
$ad =~ s/[\n|\r]//g;
print FILE $id . ' : ' . $title . ' : ' . $author . ' : ' . $email . ' : ' . $tel . ' : ' . $address . ' : ' . $ad . "\n";
}
sleep $wait;
}
}
close( FILE );
close( EMAIL );
exit;
Tuesday, February 21, 2006
A script to extract email address from vansky.com website.
This scipt is to demonstrate how to use LWP::UserAgent to extract useful info. such as email address from website.
g.pl
#!/usr/bin/perl -w
# Hao Chen
# The purpose of this script is to extract email info. from vansky.com website
#
use strict;
use LWP::UserAgent;
my $url_root = 'http://www.vansky.com/vanphp/gg/shownews.php?id=';
# starting id
my $start = 10600;
# ending id
my $end = 12000;
# wait seconds
my $wait = 1;
my $ua = LWP::UserAgent->new;
$ua->agent( 'Mozilla/5.0' );
my ( $url, $req, $res, %emails, $email );
my $verbose = 1;
for ( my $id = $start; $id < $end; $id++ )
{
$url = $url_root . $id;
print STDERR $url . "\n" if ( $verbose );
$req = HTTP::Request->new( GET => $url );
$res = $ua->request( $req );
if ( $res->is_success )
{
foreach ( split( "\n", $res->content ) )
{
if ( /Email:<\/b> ([^<>\/]*)<\/td><\/tr><tr>/ )
{
$email = $1;
if ( $email =~ /.+\@.+\..+/ )
{
print STDERR $email . "\n" if ( $verbose );
if ( exists $emails{ $email } )
{
$emails{ $email } = $emails{ $email } + 1;
} else
{
$emails{ $email } = 1;
}
}
last;
}
}
sleep $wait;
}
}
print "######## email from $start to $end #########\n";
foreach my $key ( keys %emails )
{
print STDERR "$key => $emails{$key}\n";
print $key. "\n";
}
Monday, February 20, 2006
Cool Dynamic Bash Prompt
Put the following code in your .bashrc and you will see a lovely prompt.
The only problem is that it is a bit slow with the external "date" program.
You may write your own small apps to print out dynamic Bash prompt.
-----------------------------
function smiley () {
echo -e ":\\$(($??50:51))"
}
function thetime () {
echo -e `date +%H:%M`
}
export PS1="\$(smiley) \$(thetime) \w > "
The only problem is that it is a bit slow with the external "date" program.
You may write your own small apps to print out dynamic Bash prompt.
-----------------------------
function smiley () {
echo -e ":\\$(($??50:51))"
}
function thetime () {
echo -e `date +%H:%M`
}
export PS1="\$(smiley) \$(thetime) \w > "
Thursday, February 09, 2006
Perl HowTo: determine the Operating system
I tried to figure our the operation system where my Perl script is running.
I found the following URL provides all the necessary info.
http://alma.ch/perl/perloses.htm
To be short, as quoted in the page.
"As of version 5.002, Perl is built with a $^O variable that indicates the operating system it was built on. This was implemented to help speed up code that would otherwise have to use Config; and use the value of $Config{'osname'}. Of course, to get detailed information about the system, looking into %Config is certainly recommended."
I found the following URL provides all the necessary info.
http://alma.ch/perl/perloses.htm
To be short, as quoted in the page.
"As of version 5.002, Perl is built with a $^O variable that indicates the operating system it was built on. This was implemented to help speed up code that would otherwise have to use Config; and use the value of $Config{'osname'}. Of course, to get detailed information about the system, looking into %Config is certainly recommended."
Wednesday, February 08, 2006
Debugging in Perl
I usual use "perl -d your_scripts scripts_parameter" to debug my Perl scripts.
Some commonly used debug commands are:
* b [line number | function name]
create a breakpoint in a line or a function
* B [breakpoint number]
remove breakpoint
* l
list the content of the script
* L
list breakpoints
* n
step over subroutine
* s
single step
*
repeat the last n or s command
* p [variable name | statement ]
print the value of a variable or the result of a statement
* c
continue running till next breakpoint
* r
start to run
* q
quit the debugger
Some commonly used debug commands are:
* b [line number | function name]
create a breakpoint in a line or a function
* B [breakpoint number]
remove breakpoint
* l
list the content of the script
* L
list breakpoints
* n
step over subroutine
* s
single step
*
repeat the last n or s command
* p [variable name | statement ]
print the value of a variable or the result of a statement
* c
continue running till next breakpoint
* r
start to run
* q
quit the debugger
Retrieve current playing from WCPE.ORG
This script is used to display the current playing from wcpe.org, an online classical radio station.
This is my second version using LWP and sleep function.
My previous version uses "wget" and alarm signal handler.
I also had a Python version, doing the same thing.
I like this better because it is relatively simple and more clear.
The very first version was created 2+ years ago and I rewrote it couple of times for various reasons.
I added command line options support today and finally decide to publish it.
The html code is generated by PerlTidy, which is one of my favorite Perl tools, used to format your Perl code and generate readable HTML code for your Perl script for publishing.
btw: PerlTidy is named after the HTML Tidy tool, which is a similar tool to format the html source.
Another similar source code beautifier tool is BCPP, used to beautify your C/C++ code.
wcpe.pl
#!/usr/bin/perl -w
# Hao Chen (lcheu@cs.sfu.ca)
#
# The purpose of this script is to retrieve the current playing info. from wcpe.org's website.
# The default behavior is to retrieve the current playing each time it changes.
#
use strict;
use LWP::UserAgent;
my $url = 'http://theclassicalstation.org/music/';
my $weekdays = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'];
my $today;
my $debug = 0;
my $xmessage = 1;
my ($start_time, $composer, $title, $performer, $record);
my ($wait, $msg, $hour, $minute);
my ($ua, $req, $res);
$ua = LWP::UserAgent->new;
$ua->agent("LWP/0.1 ");
sub retrieve_playing
{
$req = HTTP::Request->new(GET => $today);
$res = $ua->request($req);
if ($res->is_success) {
foreach (split("\n", $res->content)) {
if ($_ =~ m@.*<td>(\d{1,2}):(\d\d)</td>.*Buy Now!</a></td> <td>(.*)</td> <td>(.*)</td> <td>(.*)</td> <td>(.*)</td> <td>[\d /\.\-CD]*</td> <td>\d*</td>@) {
if ($debug) {
print "..................\n";
print "start time: $1:$2\n";
print "composer: $3\n";
print "title: $4\n";
print "performers: $5\n";
print "record label: $6\n";
}
if (($hour*60 + $minute) <= ($1 * 60 + $2)) {
if ($debug) {
print "******************\n";
print "$1:$2\n";
printf "start time: %40s\n", $start_time;
printf "composer: %40s\n", $composer;
printf "title: %40s\n", $title;
printf "performers: %40s\n", $performer;
printf "record label: %40s\n", $record;
}
$wait = int(($1 - $hour) * 3600 + ($2 - $minute) * 60);
$msg = "$composer\n$title\n$performer\nRemaining:$wait seconds";
prompt_message($msg);
last;
}
$start_time = "$1:$2";
$composer = $3;
$title = $4;
$performer = $5;
$record = $6;
} else {
print "NOT MATCH: ". $_ . "\n" if ($debug);
}
}
} else {
print $res->status_line, "\n";
}
}
sub prompt_message
{
my ($msg) = @_;
if ($xmessage) {
my @cmd_line = ("/usr/X11R6/bin/xmessage", $msg, "-center", "-timeout", 10);
system(@cmd_line) == 0 or die "system @cmd_line failed: $?";
} else {
print $msg . "\n";
}
}
sub print_usage
{
print<<EOF;
Hao Chen (c) 2004-2006. All rights reserved.
Usage: perl wcpe.pl [Options]
Options:
-h | -help Print this help
-d | -debug In debug mode
-x | -xmessage Use xmessage to display info. (default)
-nox | -noxmessage Don't use xmessage, display info. to console
-1 | -once Run only once (default behavior is loop)
EOF
exit;
}
MAIN:
{
my $once = 0;
while ($ARGV[0]) {
if ($ARGV[0] eq "-h" || $ARGV[0] eq "-help") {
print_usage();
} elsif ($ARGV[0] eq "-d" || $ARGV[0] eq "-debug") {
$debug = 1;
} elsif ($ARGV[0] eq "-x" || $ARGV[0] eq "-xmessage") {
$xmessage = 1;
} elsif ($ARGV[0] eq "-nox" || $ARGV[0] eq "-noxmessage") {
$xmessage = 0;
} elsif ($ARGV[0] eq "-1" || $ARGV[0] eq "-once") {
$once = 1;
}
shift @ARGV;
}
while (1) {
$hour = int(`date +%k`+3);
$minute = `date +%M`;
$today = $url.$weekdays->[`date +%u`-1].'.shtml';
retrieve_playing();
last if ($once);
sleep $wait+180;
}
}
exit;
Friday, February 03, 2006
Some DOS scripts
I recently have to use some DOS command to do my task. (The stupid Windows environment)
This remind me the very first days of using computers when I was in university in 1993.
Crazy for knowledge, we bought books about DOS commands and tried to remember most of the commands and their options. We even brag to others for knowing more commands and command line options. However, without enough knowledge of operation system, it is sometimes really hard to understand the concepts beneath those commands.
Return to my topic.
I found that we can use the following command sequence in DOS to debug the execution of your DOS program running in a subprocess, such as using system function in Perl or os.system function in Python.
cmd /K "the_program && pause && exit 0"
/K means the DOS window will remain open after the program execution.
Why not just use
cmd /K "the_program" to execute your program.
The problem with the latter is:
if your program executes successfully, but the DOS window is closed by clicking the "close" button at the right corner of the title bar, your parent process will receive a False return value.
By using the first one, if the program executes successfully, user will see a "press enter to continue ..." message at the end of the execution. User may check the execution results and press enter to exit, then the return value to parent process is 0. If the program aborted, user may use the "close" button to close the DOS window and the parent process will receive the "anticipated" False value. In this case, user can see the program execution procedure while the parent process will get correct return values.
One caveat, if users still use the "close" button to close the DOS window even when they see the "press enter to continue ..." message, the parent process will still receive a False value. Sigh ... Stupid ... you know ...
This remind me the very first days of using computers when I was in university in 1993.
Crazy for knowledge, we bought books about DOS commands and tried to remember most of the commands and their options. We even brag to others for knowing more commands and command line options. However, without enough knowledge of operation system, it is sometimes really hard to understand the concepts beneath those commands.
Return to my topic.
I found that we can use the following command sequence in DOS to debug the execution of your DOS program running in a subprocess, such as using system function in Perl or os.system function in Python.
cmd /K "the_program && pause && exit 0"
/K means the DOS window will remain open after the program execution.
Why not just use
cmd /K "the_program" to execute your program.
The problem with the latter is:
if your program executes successfully, but the DOS window is closed by clicking the "close" button at the right corner of the title bar, your parent process will receive a False return value.
By using the first one, if the program executes successfully, user will see a "press enter to continue ..." message at the end of the execution. User may check the execution results and press enter to exit, then the return value to parent process is 0. If the program aborted, user may use the "close" button to close the DOS window and the parent process will receive the "anticipated" False value. In this case, user can see the program execution procedure while the parent process will get correct return values.
One caveat, if users still use the "close" button to close the DOS window even when they see the "press enter to continue ..." message, the parent process will still receive a False value. Sigh ... Stupid ... you know ...
Bash Tips (bash options)
You can set the following bash options in your .bashrc.
Those are my favorite settings and will boost your efficiency of bash usage.
#This one will check the spelling of the path you typed in "cd" command and fixes
#the typo automatically.
#For example, "cd ttmp" will lead you to "tmp" directory, if it exists.
#You can use Bash variables to create path shortcuts.
#For example, if you have this line in your .bashrc
#"export t=/home/myhome/test/longname".
#Then, in your shell, you can use "cd t", to change directory to the $t
#I created a lot of Bash shortcuts in my .bashrc
#Sometimes, you may noticed that a pause and prompt asking for command selection
#when you pressed "TAB" in an empty command line. It may be annoying (at least for me)
#If you turn on this option, there won't be any completion question in empty command line when you pressed "TAB".
Those are my favorite settings and will boost your efficiency of bash usage.
#This one will check the spelling of the path you typed in "cd" command and fixes
#the typo automatically.
#For example, "cd ttmp" will lead you to "tmp" directory, if it exists.
shopt -s cdspell
#You can use Bash variables to create path shortcuts.
#For example, if you have this line in your .bashrc
#"export t=/home/myhome/test/longname".
#Then, in your shell, you can use "cd t", to change directory to the $t
#I created a lot of Bash shortcuts in my .bashrc
shopt -s cdable_vars
#Sometimes, you may noticed that a pause and prompt asking for command selection
#when you pressed "TAB" in an empty command line. It may be annoying (at least for me)
#If you turn on this option, there won't be any completion question in empty command line when you pressed "TAB".
shopt -s no_empty_cmd_completion
Subscribe to:
Posts (Atom)