a side-by-side reference sheet
sheet one: grammar and invocation | variables and expressions | arithmetic and logic | strings | regexes | dates and time | arrays | dictionaries | functions | execution control | exceptions | concurrency
sheet two: file handles | files | file formats | directories | processes and environment | option parsing | libraries and namespaces | objects | inheritance and polymorphism | reflection | net and web | unit tests | debugging and profiling | java interop
file handles | ||||
---|---|---|---|---|
node | ruby | |||
standard file handles | process.stdin process.stdout process.stderr |
$stdin $stdout $stderr | ||
read line from stdin |
line = gets | |||
end-of-file behavior | returns non-empty string without newline or raises EOFError | |||
chomp |
line.chomp! | |||
write line to stdout |
console.log("Hello, World"); | puts "Hello, World!" | ||
write formatted string to stdout | printf("%.2f\n", Math::PI) | |||
open file for reading |
f = File.open("/etc/hosts") | |||
open file for writing |
var f = fs.openSync("/tmp/test", "w"); | f = File.open("/tmp/test", "w") | ||
set file handle encoding | fin = File.open("/tmp/foo", "r:utf-8") fout = File.open("/tmp/bar", "w:utf-8") |
|||
open file for appending | f = File.open("/tmp/err.log", "a") | |||
close file |
f.close | |||
close file implicitly | File.open("/tmp/test", "w") do |f| f.puts("lorem ipsum") end |
|||
i/o error | raise IOError or subclass of SystemCallError exception | |||
encoding error | ||||
read line |
f.gets | |||
iterate over file by line |
f.each do |line| print(line) end |
|||
read file into array of strings | a = f.lines.to_a | |||
read file into string | var fs = require('fs'); var s = fs.readFileSync('/etc/hosts', 'utf8'); |
s = f.read | ||
write string |
f.write("lorem ipsum") | |||
write line |
f.puts("lorem ipsum") | |||
flush file handle |
f.flush | |||
end-of-file test |
f.eof? | |||
file handle position get, set |
f.tell f.seek(0) f.pos f.pos = 0 |
|||
open temporary file | require 'tempfile' f = Tempfile.new('') f.puts "lorem ipsum" puts "tmp file: #{f.path}" f.close file is removed when file handle is garbage-collected or interpreter exits |
|||
in memory file | require 'stringio' f = StringIO.new f.puts("lorem ipsum") f.rewind s = f.read |
|||
files | ||||
node | ruby | |||
file exists test, file regular test |
var fs = require('fs'); var qry = fs.existsSync('/etc/hosts'); var stat = fs.statSync('/etc/hosts'); var qry2 = stat.isFile(); |
File.exists?("/etc/hosts") File.file?("/etc/hosts") |
||
file size |
var fs = require('fs'); var stat = fs.statSync('/etc/hosts'); var sz = stat.size; |
File.size("/etc/hosts") | ||
is file readable, writable, executable | File.readable?("/etc/hosts") File.writable?("/etc/hosts") File.executable?("/etc/hosts") |
|||
set file permissions |
var fs = require('fs'); fs.chmodSync('/tmp/foo', parseInt('755', 8)); |
File.chmod(0755, "/tmp/foo") | ||
last modification time | var fs = require('fs'); var stat = fs.statSync('/etc/hosts'); var dt = stat.mtime; |
# Time object: t2 = File.stat('/etc/passwd').mtime # unix epoch: t = t2.to_i |
||
copy file, remove file, rename file | # npm install fs-extra var fs = require('fs-extra'); fs.copySync('/tmp/foo', '/tmp/bar'); fs.unlinkSync('/tmp/foo'); fs.renameSync('/tmp/bar', '/tmp/foo'); |
require 'fileutils' FileUtils.cp("/tmp/foo", "/tmp/bar") FileUtils.rm("/tmp/foo") FileUtils.mv("/tmp/bar", "/tmp/foo") |
||
create symlink, symlink test, readlink | var fs = require('fs'); fs.symlinkSync('/etc/hosts', '/tmp/hosts'); var stat = fs.statSync('/tmp/hosts'); stat.isSymbolicLink(); var path = fs.readlinkSync( '/tmp/hosts'); |
File.symlink("/etc/hosts", "/tmp/hosts") File.symlink?("/etc/hosts") File.realpath("/tmp/hosts") |
||
generate unused file name | ||||
file formats | ||||
node | ruby | |||
parse csv | var fs = require('fs'); # npm install csv var csv = require('csv'); var s = fs.readFileSync('no-header.csv'); var a; csv().from.string(s).to.array(function(d) { a = d }); |
require 'csv' CSV.foreach("foo.csv") do |row| puts row.join("\t") end |
||
generate csv | # npm install csv var csv = require('csv'); var a = [['one', 'une', 'uno'], ['two', 'deux', 'dos']]; var s; csv().from.array(a).to.string(function (o) { s = o; }); |
require 'csv' CSV.open("foo.csv", "w") do |csv| csv << ["one", "une", "uno"] csv << ["two", "deux", "dos"] end |
||
parse json | var s1 = '{"t":1,"f":0}'; var d1 = JSON.parse(s1); |
require 'json' d = JSON.parse('{"t":1,"f":0}') |
||
generate json | var d2 = {'t': 1, 'f': 0}; var s2 = JSON.stringify(d1); |
require 'json' s = {'t' => 1,'f' => 0}.to_json |
||
parse yaml | # sudo gem install safe_yaml require 'safe_yaml' data = YAML.safe_load("—-\nt: 1\nf: 0\n") |
|||
generate yaml | # sudo gem install safe_yaml require 'safe_yaml' s = YAML.dump({'t' => 1, 'f' => 0}) |
|||
parse xml all nodes matching xpath query; first node matching xpath query |
# npm install xmldom xpath var dom = require('xmldom').DOMParser; var xpath = require('xpath'); var xml = '<a><b><c ref="3">foo</c></b></a>'; var doc = new dom().parseFromString(xml); var nodes = xpath.select('/a/b/c', doc); nodes.length; nodes[0].firstChild.data; |
require 'rexml/document' include REXML xml = "<a><b><c ref='3'>foo</c></b></a>" # raises REXML::ParseException if # not well-formed: doc = Document.new(xml) nodes = XPath.match(doc,"/a/b/c") puts nodes.size puts nodes[0].text node = XPath.first(doc,"/a/b/c") puts node.text puts node.attributes["ref"] |
||
generate xml | # npm install xmlbuilder var builder = require('xmlbuilder'); var xml = builder.create('a').ele('b', {id: 123}, 'foo').end(); |
# gem install builder require 'builder' builder = Builder::XmlMarkup.new xml = builder.a do |child| child.b("foo", :id=>"123") end # <a><b id="123">foo</b></a>: puts xml |
||
parse html | # gem install nokogiri require 'nokogiri' html = File.open("foo.html").read doc = Nokogiri::HTML(html) doc = doc.xpath("//a").each do |link| puts link["href"] end |
|||
directories | ||||
node | ruby | |||
working directory | var old_dir = process.cwd(); process.chdir("/tmp"); |
old_dir = Dir.pwd Dir.chdir("/tmp") |
||
build pathname | File.join("/etc", "hosts") | |||
dirname and basename | File.dirname("/etc/hosts") File.basename("/etc/hosts") |
|||
absolute pathname and tilde expansion |
# symbolic links are not resolved: File.expand_path("foo") File.expand_path("/foo") File.expand_path("../foo") File.expand_path("./foo") File.expand_path("~/foo") |
|||
iterate over directory by file | Dir.open("/etc").each do |file| puts file end |
|||
glob paths | Dir.glob("/etc/*").each do |path| puts path end |
|||
make directory | require 'fileutils' FileUtils.mkdir_p("/tmp/foo/bar") |
|||
recursive copy | require 'fileutils' FileUtils.cp_r("/tmp/foodir", "/tmp/bardir") |
|||
remove empty directory | File.rmdir("/tmp/foodir") | |||
remove directory and contents | require 'fileutils' FileUtils.rm_rf("/tmp/foodir") |
|||
directory test |
File.directory?("/tmp") | |||
generate unused directory | require 'tmpdir' path = Dir.mktmpdir("/tmp/foo") |
|||
system temporary file directory | require 'tmpdir' Dir.tmpdir |
|||
processes and environment | ||||
node | ruby | |||
command line arguments and script name |
process.argv.slice(2) process.argv[1] // process.argv[0] contains "node" |
ARGV $PROGRAM_NAME |
||
environment variable get, set |
process.env["HOME"] process.env["PATH"] = "/bin"; |
ENV["HOME"] ENV["PATH"] = "/bin" |
||
get pid, parent pid | process.pid none |
Process.pid Process.ppid |
||
user id and name | require 'etc' Process.uid Etc.getpwuid(Process.uid)["name"] |
|||
exit |
process.exit(0); | exit(0) | ||
set signal handler |
Signal.trap("INT", lambda do |signo| puts "exiting..." exit 1 end ) |
|||
executable test |
File.executable?("/bin/ls") | |||
external command |
unless system("ls -l /tmp") raise "ls failed" end |
|||
shell-escaped external command |
path = gets path.chomp! unless system("ls", "-l", path) raise "ls failed" end |
|||
command substitution |
files = `ls -l /tmp` unless $?.success? raise "ls failed" end files = %x(ls) unless $?.success? raise "ls failed" end |
|||
option parsing | ||||
node | ruby | |||
command line options boolean option, option with argument, usage |
require 'optparse' options = {} OptionParser.new do |opts| opts.banner = "usage: #{$0} [OPTIONS] [ARG ...]" opts.on("-f", "--file FILE") do |arg| options[:file] = arg end opts.on("-v", "--verbose") do |arg| options[:verbose] = arg end end.parse! file = options[:file] verbose = options[:verbose] # The flags -h and --help and the # usage message are generated # automatically. # # After calling OptionParser.parse! only # positional arguments are in ARGV. # # Options can follow positional args. |
|||
libraries and namespaces | ||||
node | ruby | |||
load library |
require 'foo.rb' # searches $LOAD_PATH for foo.rb, foo.so, # foo.o, foo.dll: require 'foo' |
|||
load library in subdirectory | require 'foo/bar.rb' require 'foo/bar' |
|||
hot patch |
load 'foo.rb' | |||
load error | raises LoadError if library not found; exceptions generated when parsing library propagate to client | |||
main routine in library | if $PROGRAM_NAME == __FILE__ code end |
|||
library path | # $: is synonym for $LOAD_PATH: $LOAD_PATH $LOAD_PATH << "/some/path" |
|||
library path environment variable | $ RUBYLIB=~/lib ruby foo.rb | |||
library path command line option | $ ruby -I ~/lib foo.rb | |||
simple global identifiers | variables which start with $ | |||
multiple label identifiers | constants, classes, and modules | |||
label separator |
Foo::Bar.baz | |||
root namespace definition | # outside of class or module; only # constants in root namespace: FOO = 3 # inside class or module: ::FOO = 3 |
|||
namespace declaration |
class Foo # class definition end module Foo # module definition end |
|||
child namespace declaration | module Foo::Bar # module definitions end module Foo module Bar # module definitions end end # classes can nest inside classes or # modules; modules can nest in classes |
|||
namespace alias | Fu = Foo.dup include Fu |
|||
unqualified import of namespace |
# inside class or module: include Foo |
|||
unqualified import of all subnamespaces | ||||
unqualified import of definitions |
none | |||
list installed packages, install a package |
$ gem list $ gem install rails |
|||
package specification format | in foo.gemspec: spec = Gem::Specification.new do |s| s.name = "foo" s.authors = "Joe Foo" s.version = "1.0" s.summary = "a gem" s.files = Dir["lib/*.rb"] end |
|||
objects | ||||
javascript | ruby | |||
define class |
function Int(i) { this.value = i === undefined ? 0 : i; } |
class Int attr_accessor :value def initialize(i=0) @value = i end end |
||
create object |
var i = new Int(); var i2 = new Int(7); |
i = Int.new i2 = Int.new(7) |
||
instance variable visibility | public | private by default; use attr_reader, attr_writer, attr_accessor to make public | ||
get and set instance variable |
var v = i.value; i.value = v + 1; |
v = i.value i.value = v + 1 |
||
define method |
/* inside constructor: */ this.plus = function(v) { return this.value + v; }; |
def plus(i) value + i end |
||
invoke method |
i.plus(3); | i.plus(7) | ||
define class method | class Foo def Foo.one puts "one" end end |
|||
invoke class method |
Foo.one | |||
define class variable | class Foo @@instances = 1 end |
|||
get and set class variable | class Foo def initialize @@instances += 1 end end |
|||
handle undefined method invocation |
def method_missing(name, *a) puts "no def: #{name}" + " arity: #{a.size}" end |
|||
alias method | class Point attr_reader :x, :y, :color alias_method :colour, :color def initialize(x, y, color=:black) @x, @y = x, y @color = color end end |
|||
destructor |
val = i.value ObjectSpace.define_finalizer(int) { puts "bye, #{val}" } |
|||
inheritance and polymorphism | ||||
javascript | ruby | |||
subclass |
class Counter < Int @@instances = 0 def initialize @@instances += 1 super end def incr self.value += 1 end def self.instances @@instances end end |
|||
mixin | ||||
overload operator | class Fixnum def /(n) self.fdiv(n) end end |
|||
reflection | ||||
javascript | ruby | |||
object id |
none | o.object_id | ||
inspect type |
typeof([]) === 'object' | [].class == Array | ||
basic types | number string boolean undefined function object # these evaluate as 'object': typeof(null) typeof([]) typeof({}) |
NilClass TrueClass FalseClass Fixnum Bignum Float String Regexp Time Array Hash Object File |
||
inspect class | // returns prototype object: Object.getPrototypeOf(o) |
o.class == Foo o.instance_of?(Foo) |
||
inspect class hierarchy | var pa = Object.getPrototypeOf(o) //prototype's of prototype object: var grandpa = Object.getPrototypeOf(pa) |
o.class.superclass o.class.included_modules |
||
has method? |
o.reverse && typeof(o.reverse) === 'function' | o.respond_to?("reverse") | ||
message passing |
not a standard feature | (1..9).each do |i| o.send("phone#{i}=", nil) end |
||
eval |
eval('1 + 1') | loop do puts eval(gets) end |
||
list object methods |
o.methods | |||
list object attributes |
o.instance_variables | |||
list loaded libraries | # relative to directory in lib path: $LOADED_FEATURES $" |
|||
list loaded namespaces | Class.constants.select do |c| Module.const_get(c).class == Class end |
|||
inspect namespace | require 'uri' URI.constants URI.methods URI.class_variables |
|||
pretty-print |
require 'pp' d = {'lorem' => 1, 'ipsum' => [2, 3]} pp d |
|||
source line number and file name | __LINE__ __FILE__ |
|||
command line documentation | $ ri -c $ ri Math $ ri Math.atan2 |
|||
net and web | ||||
javascript | ruby | |||
get local hostname, dns lookup, reverse dns lookup | require 'socket' hostname = Socket.gethostname ip = Socket.getaddrinfo( Socket.gethostname, "echo")[0][3] host2 = Socket.gethostbyaddr(ip)[0] |
|||
http get |
require 'net/http' url = "www.google.com" r = Net::HTTP.start(url, 80) do |f| f.get("/") end if r.code == "200" s = r.body end |
|||
http post |
||||
serve working directory | $ ruby -rwebrick -e \ 'WEBrick::HTTPServer.new(:Port => 8000, '\ ':DocumentRoot => Dir.pwd).start' |
|||
absolute url from base and relative url |
require 'uri' URI.join("http://google.com", "analytics") |
|||
parse url | require 'uri' url = "http://google.com:80/foo?q=3#bar" up = URI(url) protocol = up.scheme hostname = up.host port = up.port path = up.path query_str = up.query fragment = up.fragment # Ruby 1.9; returns array of pairs: params = URI.decode_www_form(query_str) |
|||
url encode/decode |
require 'cgi' CGI::escape("lorem ipsum?") CGI::unescape("lorem+ipsum%3F") |
|||
html escape escape character data, escape attribute value, unescape html entities |
require 'cgi' s2 = CGI.escapeHTML('<>&"') s3 = CGI.unescapeHTML(s2) |
|||
base64 encode/decode | require 'base64' s = File.open("foo.png").read b64 = Base64.encode64(s) s2 = Base64.decode64(b64) |
|||
unit tests | ||||
node | ruby | |||
test class | // npm install -g nodeunit exports.testFoo = function(test) { test.ok(true, 'not true!.'); test.done(); } |
require 'test/unit' class TestFoo < Test::Unit::TestCase def test_01 assert(true, "not true!") end end |
||
run tests, run test method | $ nodeunit test_foo.js $ nodeunit -t testFoo test_foo.js |
$ ruby test_foo.rb $ ruby test_foo.rb -n test_01 |
||
equality assertion | var s = 'do re mi'; test.equals(s, 'do re mi'); |
s = "do re me" assert_equal("do re me", s) |
||
approximate assertion | x = 10.0 * (1.0 / 3.0) y = 10.0 / 3.0 # default for delta is 0.001 assert_in_delta(x, y, 0.1**6) |
|||
regex assertion | s = "lorem ipsum" assert_match(/lorem/, s) |
|||
exception assertion | assert_raises(ZeroDivisionError) do 1 / 0 end |
|||
mock method | # gem install mocha require 'mocha' foo = mock() foo.expects(:run).returns(7).with(13).once foo.run(13) |
|||
setup | exports.setUp = function(callback) { console.log('setting up...'); callback(); } |
# in class TestFoo: def setup puts "setting up" end |
||
teardown | exports.tearDown = function(callback) { console.log('tearing down...'); callback(); } |
# in class TestFoo: def teardown puts "tearing down" end |
||
debugging and profiling | ||||
javascript | ruby | |||
check syntax |
$ ruby -c foo.rb | |||
flags for stronger and strongest warnings | $ ruby -w foo.rb $ ruby -W2 foo.rb |
|||
lint | $ npm install jshint $ ./node_modules/jshint/bin/jshint foo.js |
$ sudo gem install rubocop $ rubocop -D foo.rb |
||
source cleanup | $ sudo gem install rubocop $ rubocop -D foo.rb |
|||
run debugger | $ sudo gem install ruby-debug $ rdebug foo.rb |
|||
debugger commands | h l n s b c w u down p q | |||
benchmark code | require 'benchmark' n = 1_000_000 i = 0 puts Benchmark.measure do n.times { i += 1 } end |
|||
profile code | $ sudo gem install ruby-prof $ ruby-prof foo.rb |
|||
java interoperation | ||||
javascript | ruby | |||
version |
JRuby 1.7 compatible with Ruby 1.9 |
|||
repl |
$ jirb | |||
interpreter |
$ jruby | |||
compiler |
$ jrubyc | |||
prologue |
none | |||
new |
rnd = java.util.Random.new | |||
method |
rnd.next_float | |||
import |
java_import java.util.Random rnd = Random.new |
|||
non-bundled java libraries |
require 'path/to/mycode.jar' | |||
shadowing avoidance |
module JavaIO include_package "java.io" end |
|||
convert native array to java array |
[1, 2, 3].to_java(Java::int) | |||
are java classes subclassable? |
yes | |||
are java class open? |
yes | |||
____________________________________________ | ____________________________________________ |
File Handles
standard file handles
The names for standard input, standard output, and standard error.
read line from stdin
How to read a line from standard input.
The illustrated function read the standard input stream until a end-of-line marker is found or the end of the stream is encountered. Only in the former case will the returned string be terminated by an end-of-line marker.
php:
fgets takes an optional second parameter to specify the maximum line length. If the length limit is encountered before a newline, the string returned will not be newline terminated.
ruby:
gets takes an optional parameter to specify the maximum line length. If the length limit is encountered before a newline, the string returned will not be newline terminated.
end-of-file behavior
What happens when attempting to read a line and the seek point is after the last newline or at the end of the file.
chomp
Remove a newline, carriage return, or carriage return newline pair from the end of a line if there is one.
php:
chop removes all trailing whitespace. It is an alias for rtrim.
python:
Python strings are immutable. rstrip returns a modified copy of the string. rstrip('\r\n') is not identical to chomp because it removes all contiguous carriage returns and newlines at the end of the string.
ruby:
chomp! modifies the string in place. chomp returns a modified copy.
write line to stdout
How to write a line to standard out. The line will be terminated by an operating system appropriate end of line marker.
python:
print appends a newline to the output. To suppress this behavior, put a trailing comma after the last argument. If given multiple arguments, print joins them with spaces.
In Python 2 print parses as a keyword and parentheses are not required:
print "Hello, World!"
ruby:
puts appends a newline to the output. print does not.
write formatted string to stdout
How to format variables and write them to standard out.
The function printf from the C standard library is a familiar example. It has a notation for format strings which uses percent signs %. Many other languages provide an implementation of printf.
open file for reading
How to open a file for reading.
ruby:
When File.open is given a block, the file is closed when the block terminates.
open file for writing
How to open a file for writing. If the file exists its contents will be overwritten.
set file handle encoding
How to open a file and specify the character encoding.
python:
The encoding of a file handle can be changed after it is opened:
find.encoding = 'UTF-8'
If the encoding is set to UTF-8, but the file contains byte sequences that are not a possible UTF-8 encoding, Python will raise a UnicodeDecodeError.
open file for appending
How to open a file with the seek point at the end of the file. If the file exists its contents will be preserved.
close file
How to close a file.
close file implicitly
How to have a file closed when a block is exited.
python:
File handles are closed when the variable holding them is garbage collected, but there is no guarantee when or if a variable will be garbage collected.
ruby:
File handles are closed when the variable holding them is garbage collected, but there is no guarantee when or if a variable will be garbage collected.
i/o error
How I/O errors are treated.
encoding error
read line
How to read up to the next newline in a file.
iterate over file by line
How to iterate over a file line by line.
read file into array of string
How to put the lines of a file into an array of strings.
read file into string
How to put the contents of a file into a single string.
write string
How to write a string to a file handle.
write line
How to write a line to a file handle. An operating system appropriate end-of-line marker is appended to the output.
**php:
Newlines in strings are translated to the operating system appropriate line terminator unless the file handle was opened with a mode string that contained 'b'.
python:
When file handles are opened with the mode strings 'r', 'w', or 'a', the file handle is in text mode. In text mode the operating system line terminator is translated to '\n' when reading and '\n' is translated back to the operating system line terminator when writing. The standard file handles sys.stdin, sys.stdout, and sys.stderr are opened in text mode.
When file handles are opened with the mode strings 'rb', 'rw', or 'ra', the file handle is in binary mode and line terminator translation is not performed. The operating system line terminator is available in os.linesep.
flush file handle
How to flush a file handle that has been written to.
end-of-file test
How to test whether the seek point of a file handle is at the end of the file.
file handle position
How to get or set the file handle seek point.
The seek point is where the next read on the file handle will begin. The seek point is measured in bytes starting from zero.
open temporary file
How to get a file handle to a file that will be removed automatically sometime between when the file handle is closed and the interpreter exits.
The file is guaranteed not to have existed before it was opened.
The file handle is opened for both reading and writing so that the information written to the file can be recovered by seeking to the beginning of the file and reading from the file handle.
On POSIX operating systems it is possible to unlink a file after opening it. The file is removed from the directory but continues to exist as long as the file handle is open. This guarantees that no other process will be able to read or modify the file contents.
php:
Here is how to create a temporary file with a name:
$path = tempnam(sys_get_temp_dir(), "");
$f = fopen($path, "w+");
python:
To unlink a temporary file on open, used TemporaryFile instead of NamedTemporaryFile:
import tempfile
f = tempfile.TemporaryFile()
in memory file
How to create a file descriptor which writes to an in-memory buffer.
python:
StringIO also supports the standard methods for reading input. To use them the client must first seek to the beginning of the in-memory file:
f = StringIO()
f.write('lorem ipsum\n')
f.seek(0)
r.read()
Files
file exists test, file regular test
How to test whether a file exists; how to test whether a file is a regular file (i.e. not a directory, special device, or named pipe).
file size
How to get the file size in bytes.
is file readable, writable, executable
How to test whether a file is readable, writable, or executable.
python:
The flags can be or'ed to test for multiple permissions:
os.access('/etc/hosts', os.R_OK | os.W_OK | os.X_OK)
set file permissions
How to set the permissions on the file.
For Perl, Python, and Ruby, the mode argument is in the same format as the one used with the Unix chmod command. It uses bitmasking to get the various permissions which is why it is normally an octal literal.
The mode argument should not be provided as a string such as "0755". Python and Ruby will raise an exception if a string is provided. Perl will convert "0755" to 755 and not 0755 which is equal to 493 in decimal.
last modification time
How to get the last modification time of a file.
For a regular file, the last modification time is the most recent time that the contents were altered.
For a directory, the last modification time is the most recent time that a file in the directory was added, removed, or renamed.
copy file, remove file, rename file
How to copy a file; how to remove a file; how to rename a file.
create symlink, symlink test, readlink
How to create a symlink; how to test whether a file is a symlink; how to get the target of a symlink.
generate unused file name
How to generate an unused file name. The file is created to avoid a race condition with another process looking for an unused file name.
The file is not implicitly deleted.
File Formats
parse csv
How to parse a CSV file and iterate through the rows.
generate csv
How to generate a CSV file from an array of tuples.
parse json
How to decode a string of JSON.
JSON data consists of objects, arrays, and JSON values. Objects are dictionaries in which the keys are strings and the values are JSON values. Arrays contain JSON values. JSON values can be objects, arrays, strings, numbers, true, false, or null.
A JSON string is JSON data encoded using the corresponding literal notation used by JavaScript source code.
JSON strings are sequences of Unicode characters. The following backslash escape sequences are supported:
\" \\ \/ \b \f \n \r \t \uhhhh.
generate json
How to encode data as a JSON string.
parse yaml
How to parse a string of YAML.
YAML is sometimes used to serialize objects. Deserializing such YAML results in the constructor of the object being executed. The YAML decoding techniques illustrated here are "safe" in that they will not execute code, however.
generate yaml
How to generate a string of YAML.
parse xml
How to parse XML and extract nodes using XPath.
ruby:
Another way of handling an XPath expression which matches multiple nodes:
XPath.each(doc,"/a/b/c") do |node|
puts node.text
end
generate xml
How to build an XML document.
An XML document can be constructed by concatenating strings, but the techniques illustrated here guarantee the result to be well-formed XML.
parse html
How to parse an HTML document.
Directories
working directory
How to get and set the working directory.
build pathname
How to construct a pathname without hard coding the system file separator.
dirname and basename
How to extract the directory portion of a pathname; how to extract the non-directory portion of a pathname.
absolute pathname
How to get the get the absolute pathname for a pathname. If the pathname is relative the working directory will be appended.
In the examples provided, if /foo/bar is the working directory and .. is the relative path, then the return value is foo
iterate over directory by file
How to iterate through the files in a directory.
In PHP, Perl, and Ruby, the files representing the directory itself . and the parent directory .. are returned.
php:
The code in the example will stop if a filename which evaluates as FALSE is encountered. One such filename is "0". A safer way to iterate through the directory is:
if ($dir = opendir("/etc")) {
while (FALSE !== ($file = readdir($dir))) {
echo "$file\n";
}
closedir($dir);
}
python:
file() is the file handle constructor. file can be used as a local variable name but doing so hides the constructor. It can still be invoked by the synonym open(), however.
os.listdir() does not return the special files . and .. which represent the directory itself and the parent directory.
glob paths
How to iterate over files using a glob pattern.
Glob patterns employ these special characters:
* | matches zero or more characters, the first of which is not . and none of which is / |
? | matches one character |
[ ] | matches one character from the list inside the brackets |
\ | escapes one of the previous characters |
Use glob patterns instead of simple directory iteration when
- dot files, including the directory itself (.) and the parent directory (..), should skipped
- a subset of the files in a directory, where the subset can be specified with a glob pattern, is desired
- files from multiple directories, where the directories can be specified with a glob pattern, are desired
- the full pathnames of the files is desired
php:
glob takes a second argument for flags. The flag GLOB_BRACE enables brace notation.
python:
glob.glob returns a list. glob.iglob accepts the same arguments and returns an iterator.
ruby:
Ruby globs support brace notation.
A brace expression matches any of the comma separated strings inside the braces.
Dir.glob("/{bin,etc,usr}/*").each do |path|
puts path
end
make directory
How to create a directory.
If needed, the examples will create more than one directory.
No error will result if a directory at the pathname already exists. An exception will be raised if the pathname is occupied by a regular file, however.
recursive copy
How to perform a recursive copy. If the source is a directory, then the directory and all its contents will be copied.
remove empty directory
How to remove an empty directory. The operation will fail if the directory is not empty.
remove directory and contents
How to remove a directory and all its contents.
directory test
How to determine if a pathname is a directory.
generate unused directory
How to generate an unused directory. The directory is created to avoid a race condition with another process looking for an unused directory.
The directory is not implicitly deleted.
ruby:
When Dir.mktmpdir is provided with a block the directory is deleted after the block finishes executing:
require 'tmpdir'
require 'fileutils'
Dir.mktmpdir("/tmp/foo") do |path|
puts path
FileUtils.cp("/etc/hosts", "#{path}/hosts")
end
system temporary file directory
The name of the system provided directory for temporary files.
On Linux the directory is often /tmp, and the operating system is often configured to delete the contents of /tmp at boot.
Processes and Environment
command line arguments
How to access arguments provided at the command line when the script was run; how to get the name of the script.
environment variable
How to get and set an environment variable. If an environment variable is set the new value is inherited by child processes.
php:
putenv returns a boolean indicating success. The command can fail because when PHP is running in safe mode only some environment variables are writable.
get pid, parent pid
How to get the process id of the interpreter process; how to get the id of the parent process.
ruby:
The process pid is also available in the global variable $$.
user id and name
How to get the user id of the interpreter process; how to get the username associated with the user id.
When writing a setuid application on Unix, there is a distinction between the real user id and the effective user id. The code examples return the real user id.
The process may be able to determine the username by inspecting environment variables. A POSIX system is required to set the environment variable LOGNAME at login. Unix systems often set USER at login, and Windows systems set %USERNAME%. There is nothing to prevent the user from altering any of these environment variables after login. The methods illustrated in the examples are thus more secure.
python:
How to get the effective user id:
os.geteuid()
ruby:
How to get the effective user id:
Process.euid
exit
python:
It is possible to register code to be executed upon exit:
import atexit
atexit.register(print, "goodbye")
It is possible to terminate a script without executing registered exit code by calling os._exit.
ruby:
It is possible to register code to be executed upon exit:
at_exit { puts "goodbye" }
The script can be terminated without executing registered exit code by calling exit!.
set signal handler
How to register a signal handling function.
executable test
How to test whether a file is executable.
external command
How to execute an external command.
shell-escaped external command
How to prevent shell injection.
command substitution
How to invoke an external command and read its output into a variable.
The use of backticks for this operation goes back to the Bourne shell (1977).
python:
A more concise solution is:
file = os.popen('ls -l /tmp').read()
os.popen was marked as deprecated in Python 2.6 but it is still available in Python 2.7 and Python 3.2.
ruby:
%x can be used with any delimiter. If the opening delimiter is (, [, or {, the closing delimiter must be ), ], or }.
Option Parsing
command line options
How to process command line options.
We describe the style used by getopt_long from the C standard library. The characteristics of this style are:
- Options can be short or long. Short options are a single character preceded by a hyphen. Long options are a word preceded by two hyphens.
- A double hyphen by itself can be used to terminate option processing. Arguments after the double hyphen are treated as positional arguments and can start with a hyphen.
- Options can be declared to be with or without argument. Options without argument are used to set a boolean value to true.
- Short options without argument can share a hyphen.
- Long options can be separated from their argument by a space or an equals sign (=). Short options can be separated from their argument by nothing, a space, or an equals sign (=).
The option processing function should identify the positional arguments. These are the command line arguments which are not options, option arguments, or the double hyphen used to terminate option processing. getopt_long permits options to occur after positional arguments.
python:
The type of an argument can be specified using the named parameter type:
parser.add_argument('--count', '-c', dest='count', type=int)
parser.add_argument('--ratio', '-r', dest='ratio', type=float)
If the argument cannot be converted to the type, the script prints out a usage statement and exits with a non-zero value.
The default value is None, but this can be changed using the named parameter default:
parser.add_argument('--file', '-f', dest='file', default='tmpfile')
parser.add_argument('--count', '-c', dest='count', type=int, default=1)
parser.add_argument('--ratio', '-r', dest='ratio', type=float, default=0.5)
Libraries and Namespaces
Terminology used in this sheet:
- library: code in its own file that can be included, loaded, or linked by client code.
- client: code which calls code in a separate file.
- top-level file or top-level script: the file containing the code in the program which executes first.
- load: to add definitions in a file to the text of a running process.
- namespace: a set of names that can be imported as a unit.
- import: to add definitions defined elsewhere to a scope.
- unqualified import: to add definitions to a scope using the same identifiers as where they are defined.
- qualified import: to add definitions to a scope. The identifiers in the scope are derived from the original identifiers in a formulaic manner. Usually the name of the namespace is added as a prefix.
- label: one of the parts of a qualified identifier.
- alias import: to add a definition to a scope under an identifier which is specified in the import statement.
- package: one or more libraries that can be installed by a package manager.
load library
Execute the specified file. Normally this is used on a file which only contains declarations at the top level.
php:
include_once behaves like require_once except that it is not fatal if an error is encountered executing the library.
load library in subdirectory
How to load a library in a subdirectory of the library path.
hot patch
How to reload a library. Altered definitions in the library will replace previous versions of the definition.
php:
Also include.
load error
How errors which are encountered while loading libraries are handled.
main routine in library
How to put code in a library which executes only when the file is run as a top-level script.
library path
The library path is a list of directory paths which are searched when loading libraries.
library path environment variable
How to augment the library path by setting an environment variable before invoking the interpreter.
library path command line option
How to augment the library path by providing a command line option when invoking the interpreter.
simple global identifiers
multiple label identifiers
label separator
The punctuation used to separate the labels in the full name of a subnamespace.
root namespace definition
namespace declaration
How to declare a section of code as belonging to a namespace.
subnamespace declaration
How to declare a section of code as belonging to a subnamespace.
import namespace
import subnamespace
import all definitions in namespace
How to import all the definitions in a namespace.
import definitions
How to import specific definitions from a namespace.
list installed packages, install a package
How to show the installed 3rd party packages, and how to install a new 3rd party package.
python
Two ways to list the installed modules and the modules in the standard library:
$ pydoc modules
$ python
>>> help('modules')
Most 3rd party Python code is packaged using distutils, which is in the Python standard library. The code is placed in a directory with a setup.py file. The code is installed by running the Python interpreter on setup.py:
package specification format
The format of the file used to specify a package.
python:
How to create a Python package using distutils. Suppose that the file foo.py contains the following code:
def add(x, y):
return x+y
In the same directory as foo.py create setup.py with the following contents:
#!/usr/bin/env python
from distutils.core import setup
setup(name='foo',
version='1.0',
py_modules=['foo'],
)
Create a tarball of the directory for distribution:
$ tar cf foo-1.0.tar foo
$ gzip foo-1.0.tar
To install a tar, perform the following:
$ tar xf foo-1.0.tar.gz
$ cd foo
$ sudo python setup.py install
If you want people to be able to install the package with pip, upload the tarball to the Python Package Index.
ruby:
For an example of how to create a gem, create a directory called foo. Inside it create a file called lib/foo.rb which contains:
def add(x, y)
x + y
end
Then create a file called foo.gemspec containing:
spec = Gem::Specification.new do |s|
s.name = 'foo'
s.authors = 'Joe Foo'
s.version = '1.0'
s.summary = 'a gem'
s.files = Dir['lib/*.rb']
end
To create the gem, run this command:
$ gem build foo.gemspec
A file called foo-1.0.gem is created. To install foo.rb run this command:
$ gem install foo-1.0.gem
Objects
An object is a set of functions called methods which have shared access to the object's instance variables. An object's methods and instance variables are collectively called its members. If a member of an object can be accessed or invoked by code which is not in a member of the object, it is public. Otherwise it is private.
A class is a set of objects which have the same method definitions. The objects in the set are instances of the class. Functions defined in the class namespace which are not object methods are called class methods. A class method which returns instances of the class is called a factory method. If there is class method which is responsible for creating all instances, it is called a constructor. The existence of a constructor does not preclude the existence of other factory methods since they can invoke the constructor and return its return value.
A class may contain class variables. These are global variables defined in the namespace of the class.
A method which returns the value of an instance variable is called a getter. A method which sets the value of an instance variable is called a setter. Getters and setters and seem pointless at first blush as one could make the underlying instance variable public instead. In practice getters and setters make code more maintainable. Consistent use of getters and setters conforms with the Uniform Access Principle and makes the API presented by an object to its clients simpler.
Perl instance variables are private, so Perl enforces a good practice at the cost of requiring boilerplate code for defining getters and setters.
Python instance variables are public. Although this permits concise class definitions, a maintainer of a Python class may find it difficult to replace an instance variable with a derived value when clients are accessing the instance variable directly. With an old-style Python class, the maintainer can't make the change without breaking the client code. With a new-style class the maintainer can replace an instance variable with a getter and setter and mark them with the @property decorator.
Ruby, like Perl, has private instance variables. It has the directives attr_reader, attr_writer, and attr_accessor for defining getters and setters. Ruby classes are objects and in particular they are instances of the Module class. The directives attr_reader, attr_writer, and attr_accessor are instance methods defined in the Module class which execute when the class block executes.
define class
php:
Properties (i.e. instance variables) must be declared public, protected, or private. Methods can optionally be declared public, protected, or private. Methods without a visibility modifier are public.
python:
As of Python 2.2, classes are of two types: new-style classes and old-style classes. The class type is determined by the type of class(es) the class inherits from. If no superclasses are specified, then the class is old-style. As of Python 3.0, all classes are new-style.
New-style classes have these features which old-style classes don't:
- universal base class called object.
- descriptors and properties. Also the __getattribute__ method for intercepting all attribute access.
- change in how the diamond problem is handled. If a class inherits from multiple parents which in turn inherit from a common grandparent, then when checking for an attribute or method, all parents will be checked before the grandparent.
create object
How to create an object.
get and set attribute
How to get and set an attribute.
python:
Defining explicit setters and getters in Python is considered poor style. Extra logic can be achieved without disrupting the clients of the class by creating a property:
def getValue(self):
print("getValue called")
return self.__dict__['value']
def setValue(self,v):
print("setValue called")
self.__dict__['value'] = v
value = property(fget=getValue, fset = setValue)
instance variable visibility
How instance variable access works.
define method
How to define a method.
invoke method
How to invoke a method.
destructor
How to define a destructor.
python:
A Python destructor is not guaranteed to be called when all references to an object go out of scope, but apparently this is how the CPython implementations work.
ruby:
Ruby lacks a destructor. It is possible to register a block to be executed before the memory for an object is released by the garbage collector. A ruby interpreter may exit without releasing memory for objects that have gone out of scope and in this case the finalizer will not get called. Furthermore, if the finalizer block holds on to a reference to the object, it will prevent the garbage collector from freeing the object.
method missing
How to handle when a caller invokes an undefined method.
php:
Define the method __callStatic to handle calls to undefined class methods.
python:
__getattr__ is invoked when an attribute (instance variable or method) is missing. By contrast, __getattribute__, which is only available in Python 3, is always invoked, and can be used to intercept access to attributes that exist. __setattr__ and __delattr__ are invoked when attempting to set or delete attributes that don't exist. The del statement is used to delete an attribute.
ruby:
Define the method self.method_missing to handle calls to undefined class methods.
define class method
invoke class method
How to invoke a class method.
define class variable
get and set class variable
method alias
How to create an alias for a method.
ruby:
Ruby provides the keyword alias and the method alias_method in the class Module. Inside a class body they behave idenitically. When called from inside a method alias has no effect but alias_method works as expected. Hence some recommend always using alias_method.
Inheritance and Polymorphism
A subclass is a class whose objects contain all of the methods from another class called the superclass. Objects in the subclass should in principle be usable anywhere objects in the superclass can be used. The subclass may have extra methods which are not found in the superclass. Moreover it may replace method definitions in the superclass with its own definitions provided the signature remains the same. This is called overriding.
It is sometimes useful to define superclass which is never instantiated. Such a class is called an abstract class. An abstract class is way to share code between two or more subclasses or to define the API that two or more subclasses should implement.
inheritance
How to use inheritance.
mixin
operator overloading
How to define the behavior of the binary operators.
Reflection
object id
How to get an identifier for an object or a value.
inspect type
php:
The PHP manual says that the strings returned by gettype are subject to change and advises using the following predicates instead:
is_null
is_bool
is_numeric
is_int
is_float
is_string
is_array
is_object
is_resource
All possible return values of gettype are listed.
basic types
inspect class
How to get the class of an object.
javascript:
inspect class hierarchy
has method?
python:
hasattr(o,'reverse') will return True if there is an instance variable named 'reverse'.
message passing
javascript:
The following works in Firefox:
var o = {}
o.__noSuchMethod__ = function(name) { alert('you called ' + name) }
o.whoopsie()
eval
How to interpret a string as code and return its value.
php:
The value of the string is the value of of the return statement that terminates execution. If execution falls off the end of the string without encountering a return statement, the eval evaluates as NULL.
python:
The argument of eval must be an expression or a SyntaxError is raised. The Python version of the mini-REPL is thus considerably less powerful than the versions for the other languages. It cannot define a function or even create a variable via assignment.
list object methods
list object attributes
python:
dir(o) returns methods and instance variables.
pretty print
How to display the contents of a data structure for debugging purposes.
source line number and file name
How to get the current line number and file name of the source code.
command line documentation
How to get documentation from the command line.
ruby:
Searching for Math.atan2 will return either class method or instance method documentation. If there is documentation for both one can be specific with the following notation:
$ ri Math::atan2
$ ri Math#atan2
Net and Web
get local hostname, dns lookup, reverse dns lookup
How to get the hostname and the ip address of the local machine without connecting to a socket.
The operating system should provide a method for determining the hostname. Linux provides the uname system call.
A DNS lookup can be performed to determine the IP address for the local machine. This may fail if the DNS server is unaware of the local machine or if the DNS server has incorrect information about the local host.
A reverse DNS lookup can be performed to find the hostname associated with an IP address. This may fail for the same reasons a forward DNS lookup might fail.
http get
How to make an HTTP GET request and read the response into a string.
http post
serve working directory
A command line invocation to start a single process web server which serves the working directory at http://localhost:8000.
$ sudo cpan -i IO::All
$ perl -MIO::All -e 'io(":8000")->fork->accept->(sub { $_[0] < io(-x $1 ? "./$1 |" : $1) if /^GET \/(.*) / })'
absolute url
How to construct an absolute URL from a base URL and a relative URL as documented in RFC 1808.
When constructing the absolute URL, the rightmost path component of the base URL is removed unless it ends with a slash /. The query string and fragment of the base URL are always removed.
If the relative URL starts with a slash / then the entire path of the base URL is removed.
If the relative URL starts with one or more occurrences of ../ then one or more path components are removed from the base URL.
The base URL and the relative URL will be joined by a single slash / in the absolute URL.
php:
Here is a PHP function which computes absolute urls.
parse url
How to extract the protocol, host, port, path, query string, and fragment from a URL. How to extract the parameters from the query string.
python:
urlparse can also be used to parse FTP URLs:
up = urlparse.urlparse('ftp://foo:bar@google.com/baz;type=binary')
# 'foo'
up.username
# 'bar'
up.password
# 'type=binary'
up.params
ruby:
How to parse an FTP URL:
up = URI('ftp://foo:bar@google.com/baz;type=binary')
# "foo"
up.user
# up.password
"bar"
# "binary"
up.typecode
url encode/decode
How to URL encode and URL unencode a string.
URL encoding, also called percent encoding, is described in RFC 3986. It replaces all characters except for the letters, digits, and a few punctuation marks with a percent sign followed by their two digit hex encoding. The characters which are not escaped are:
A-Z a-z 0-9 - _ . ~
URL encoding can be used to encode UTF-8, in which case each byte of a UTF-8 character is encoded separately.
When form data is sent from a browser to a server via an HTTP GET or an HTTP POST, the data is percent encoded but spaces are replaced by plus signs + instead of %20. The MIME type for form data is application/x-www-form-urlencoded.
python:
In Python 3 the functions quote_plus, unquote_plus, quote, and unquote moved from urllib to urllib.parse.
urllib.quote replaces a space character with %20.
urllib.unquote does not replace + with a space character.
html escape
How to escape special characters in HTML character data; how to escape special characters in HTML attribute values; how to unescape HTML entities.
In character data, such as what occurs in between a start and end tag, the characters <, >, and & must be replaced by <, >, and &.
Attribute values in HTML tags must be quoted if they contain a space or any of the characters "'`=<>. Attribute values can be double quoted or single quoted. Double quotes and single quotes can be escaped by using the HTMl entities " and '. It is not necessary to escape the characters <, >, and & inside quoted attribute values.
php:
The flag ENT_NOQUOTES to the function htmlspecialchars causes double quotes " to be escaped.
The flag ENT_QUOTES causes single quotes ' to be escaped.
base64 encode/decode
How to encode binary data in ASCII using the Base64 encoding scheme.
A popular Base64 encoding is the one defined by RFC 2045 for MIME. Every 3 bytes of input is mapped to 4 of these characters: [A-Za-z0-9/+].
If the input does not consist of a multiple of three characters, then the output is padded with one or two hyphens: =.
Whitespace can inserted freely into Base64 output; this is necessary to support transmission by email. When converting Base64 back to binary whitespace is ignored.
Unit Tests
test class
How to define a test class and make a truth assertion.
The argument of a truth assertion is typically an expression. It is a good practice to include a failure message as a second argument which prints out variables in the expression.
run tests; run test method
How to run all the tests in a test class; how to run a single test from the test class.
equality assertion
How to test for equality.
python:
Note that assertEquals does not print the values of its first two arguments when the assertion fails. A third argument can be used to provide a more informative failure message.
approximate assertion
How to assert that two floating point numbers are approximately equal.
regex assertion
How to test that a string matches a regex.
exception assertion
How to test whether an exception is raised.
mock method
How to create a mock method.
A mock method is used when calling the real method from a unit test would be undesirable. The method that is mocked is not in the code that is being tested, but rather a library which is used by that code. Mock methods can raise exceptions if the test fails to invoke them or if the wrong arguments are provided.
python:
assert_called_once_with can takes the same number of arguments as the method being mocked.
If the mock method was called multiple times, the method assert_called_with can be used in place of asert_called_once_with to make an assertion about the arguments that were used in the most recent call.
A mock method which raises an exception:
foo = Foo()
foo.run = mock.Mock(side_effect=KeyError('foo'))
with self.assertRaises(KeyError):
foo.run(13)
foo.run.assert_called_with(13)
ruby:
The with method takes the same number of arguments as the method being mocked.
Other methods are available for use in the chain which defines the assertion. The once method can be replaced by never or twice. If there is uncertainty about how often the method will be called one can used at_least_once, at_least(m), at_most_once, at_most(n) to set lower or upper bounds. times(m..n) takes a range to set both the lower and upper bound.
A mock method which raises an exception:
foo = mock()
foo.expects(:run).
raises(exception = RuntimeError, message = 'bam!').
with(13).
once
assert_raises(RuntimeError) do
foo.run(13)
end
There is also a method called yields which can be used in the chain which defines the assertion. It makes the mock method yield to a block. It takes as arguments the arguments it passes to the block.
setup
How to define a setup method which gets called before every test.
teardown
How to define a cleanup method which gets called after every test.
Debugging and Profiling
check syntax
How to check the syntax of code without executing it.
flags for stronger and strongest warnings
Flags to increase the warnings issued by the interpreter.
python:
The -t flag warns about inconsistent use of tabs in the source code. The -3 flag is a Python 2.X option which warns about syntax which is no longer valid in Python 3.X.
lint
A lint tool.
source cleanup
A tool which detects or removes semantically insignificant variation in the source code.
run debugger
How to run a script under the debugger.
debugger commands
A selection of commands available when running the debugger. The gdb commands are provided for comparison.
cmd | perl -d | gdb |
---|---|---|
help | h | h |
list | l [first, last] | l [first, last] |
next statement | n | n |
step into function | s | s |
set breakpoint | b | b [file:]line |
list breakpoints | L | i b |
delete breakpoint | B num | d num |
continue | c | c |
show backtrace | T | bt |
move up stack | u | |
move down stack | do | |
print expression | p expr | p expr |
(re)run | R | r [arg1[, arg2 …]] |
quit debugger | q | q |
benchmark code
How to run a snippet of code repeatedly and get the user, system, and total wall clock time.
profile code
How to run the interpreter on a script and get the number of calls and total execution time for each function or method.
Java Interoperation
version
Version of the scripting language JVM implementation used in this reference sheet.
repl
Command line name of the repl.
interpreter
Command line name of the interpreter.
compiler
Command line name of the tool which compiles source to java byte code.
prologue
Code necessary to make java code accessible.
new
How to create a java object.
method
How to invoke a java method.
import
How to import names into the current namespace.
import non-bundled java library
How to import a non-bundled Java library
shadowing avoidance
How to import Java names which are the same as native names.
convert native array to java array
How to convert a native array to a Java array.
are java classes subclassable?
Can a Java class be subclassed?
are java classes open?
Can a Java array be monkey patched?