Easy way to add frozen_string_literal magic string to your ruby files

1
2
3
4
5
comm -23 \
<(git ls-files|sort) \
<(git grep -l 'frozen_string_literal'|sort) \
| grep -E '\.rb$' \
| xargs -n1 sed -i '1s/^/# frozen_string_literal: true\n\n/'

The code is pretty self explanatory, we get a list of all the files in our repo and then remove the ones which already have the magic string and then filter it to just the ruby files and finally adding the magic string to all the files.

Solution to Advent of Code 2018 Day 3 in Elixir

Solving Day 3 turned out to be a bit more challenging for me as I don’t usually do these kind of exercises, Nevertheless it was fun!

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209

defmodule Rect do
defstruct [:id, :left, :top, :width, :height]

alias __MODULE__

def parse(spec) do
[id, dimensions] = String.split(spec, "@", trim: true)
[coords, size] = String.split(dimensions, ":", trim: true)

[left, top] = String.split(coords, ",", trim: true) |> Enum.map(&parse_number/1)

[width, height] = String.split(size, "x", trim: true) |> Enum.map(&parse_number/1)

%Rect{
id: String.trim(id),
left: left,
top: top,
width: width,
height: height
}
end

defp parse_number(str), do: str |> String.trim() |> String.to_integer()

def order_horizontal(r1, r2) do
if r1.left < r2.left do
{r1, r2}
else
{r2, r1}
end
end

def order_vertical(r1, r2) do
if r1.top < r2.top do
{r1, r2}
else
{r2, r1}
end
end

def squares(%Rect{width: w, height: h}) when w <= 0 or h <= 0, do: []

def squares(%Rect{} = r) do
for x <- r.left..(r.left + r.width - 1), y <- r.top..(r.top + r.height - 1), do: {x, y}
end
end

defmodule Overlap do
def area(spec) when is_binary(spec) do
spec
|> String.split("\n", trim: true)
|> Enum.map(&Rect.parse/1)
|> area
end

def area(rects, prev_squares \\ [])

def area([h | tl], prev_squares) do
squares =
tl
|> Enum.map(fn x -> overlap(h, x) |> Rect.squares() end)

area(tl, [squares | prev_squares])
end

def area([], squares),
do:
squares
|> List.flatten()
|> Enum.uniq()
|> Enum.count()

def find_non_overlap(spec) when is_binary(spec) do
rects =
spec
|> String.split("\n", trim: true)
|> Enum.map(&Rect.parse/1)

find_non_overlap(rects, rects)
end

def find_non_overlap([h | tl], all_rects) do
if all_rects
|> Enum.filter(fn x -> x.id != h.id end)
|> Enum.all?(fn x ->
o = overlap(h, x)
o.width <= 0 || o.height <= 0
end) do
h
else
find_non_overlap(tl, all_rects)
end
end

def find_non_overlap([], _), do: raise("Not found")

def overlap(%Rect{} = r1, %Rect{} = r2) do
{l, r} = Rect.order_horizontal(r1, r2)

width = min(l.left + l.width - r.left, r.width)

{t, b} = Rect.order_vertical(r1, r2)

height = min(t.top + t.height - b.top, b.height)

%Rect{
left: r.left,
top: b.top,
width: width,
height: height
}
end
end

defmodule OverlapTest do
use ExUnit.Case

import Overlap

test "greets the world" do
assert area("""
# 1 @ 1,3: 4x4
# 2 @ 3,1: 4x4
# 3 @ 5,5: 2x2
""") == 4

assert area("""
# 1 @ 1,3: 4x4
# 2 @ 3,1: 4x4
# 3 @ 1,3: 4x4
""") == 16

assert File.read!("input.txt") |> area == 0
end

test "overlap between 2 rects" do
assert overlap(
%Rect{id: "# 1", left: 1, top: 3, width: 4, height: 8},
%Rect{id: "# 2", left: 3, top: 1, width: 4, height: 4}
) == %Rect{id: nil, left: 3, top: 3, width: 2, height: 2}

assert overlap(
%Rect{id: "# 1", left: 1, top: 3, width: 4, height: 4},
%Rect{id: "# 3", left: 5, top: 5, width: 2, height: 2}
) == %Rect{height: 2, id: nil, left: 5, top: 5, width: 0}
end

test "find_non_overlap" do
assert find_non_overlap("""
# 1 @ 1,3: 4x4
# 2 @ 3,1: 4x4
# 3 @ 5,5: 2x2
""").id == "# 3"

assert File.read!("input.txt") |> find_non_overlap == 0
end
end

defmodule RectTest do
use ExUnit.Case

test "parse" do
assert Rect.parse("# 1 @ 1,3: 4x3") == %Rect{id: "# 1", left: 1, top: 3, width: 4, height: 3}
end

test "order_horizontal" do
{%{id: "# 1"}, %{id: "# 2"}} =
Rect.order_horizontal(
%Rect{id: "# 1", left: 1, top: 3, width: 4, height: 3},
%Rect{id: "# 2", left: 3, top: 3, width: 4, height: 3}
)

{%{id: "# 4"}, %{id: "# 3"}} =
Rect.order_horizontal(
%Rect{id: "# 3", left: 10, top: 3, width: 4, height: 3},
%Rect{id: "# 4", left: 3, top: 3, width: 4, height: 3}
)
end

test "order_vertical" do
{%{id: "# 1"}, %{id: "# 2"}} =
Rect.order_vertical(
%Rect{id: "# 1", left: 1, top: 1, width: 4, height: 1},
%Rect{id: "# 2", left: 3, top: 3, width: 4, height: 3}
)

{%{id: "# 4"}, %{id: "# 3"}} =
Rect.order_vertical(
%Rect{id: "# 3", left: 10, top: 10, width: 4, height: 10},
%Rect{id: "# 4", left: 3, top: 3, width: 4, height: 3}
)
end

test "squares" do
assert Rect.squares(%Rect{id: "# 1", left: 1, top: 3, width: 2, height: 2}) == [
{1, 3},
{1, 4},
{2, 3},
{2, 4}
]

assert Rect.squares(%Rect{id: "# 1", left: 1, top: 3, width: 0, height: 0}) == []
assert Rect.squares(%Rect{id: "# 1", left: 1, top: 3, width: 0, height: 4}) == []
assert Rect.squares(%Rect{id: "# 1", left: 1, top: 3, width: 4, height: 0}) == []
assert Rect.squares(%Rect{id: "# 1", left: 1, top: 3, width: 4, height: -4}) == []
assert Rect.squares(%Rect{id: "# 1", left: 1, top: 3, width: -4, height: 4}) == []
end
end

Solutions to Advent of Code 2018 Day 2 in Elixir

Day 2’s problem was a bit tricky for Elixir. Check out the commented code to see how I tackled it:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#! /usr/bin/env elixir

defmodule Day2 do
def checksum(ids) do
{two_count, three_count} =
ids
|> String.split("\n", trim: true)
|> Enum.reduce({0, 0}, fn id, {two_count, three_count} ->
char_counts =
id
|> String.graphemes()
|> Enum.group_by(& &1)
|> Enum.map(fn {_c, vals} -> Enum.count(vals) end)
|> Enum.uniq()

{incr_if_count(char_counts, two_count, 2), incr_if_count(char_counts, three_count, 3)}
end)

two_count * three_count
end

defp incr_if_count(counts, prev_count, matcher) do
if matcher in counts do
prev_count + 1
else
prev_count
end
end

def find_common_boxes(ids) do
ids =
ids
|> String.split("\n", trim: true)
|> Enum.map(&String.graphemes/1)

{lhs, rhs} = find_similar(ids)


intersection(lhs, rhs)
end

defp intersection(lhs, rhs) do
Enum.zip(lhs, rhs)
|> Enum.map(fn {l, l} -> l; _ -> nil end)
|> Enum.filter(& &1)
|> Enum.join
end

defp find_similar([lhs_id | ids]) do
matching_id =
Enum.find(ids, fn rhs_id ->
diff_count(lhs_id, rhs_id) == 1
end)

if matching_id do
{lhs_id, matching_id}
else
find_similar(ids)
end
end

defp diff_count(lhs, rhs) do
Enum.zip(lhs, rhs)
|> Enum.map(fn
{x, x} -> 0
_ -> 1
end)
|> Enum.sum()
end
end

ExUnit.start()

defmodule Day2Test do
use ExUnit.Case

import Day2

test "#checksum" do
assert checksum("""
abcdef
bababc
abbcde
abcccd
aabcdd
abcdee
ababab
""") == 12

assert File.read!("input.txt") |> checksum == 4693
end

test "#find_common_boxes" do
assert find_common_boxes("""
abcde
fghij
klmno
pqrst
fguij
axcye
wvxyz
""") == "fgij"

assert File.read!("input.txt") |> find_common_boxes == "pebjqsalrdnckzfihvtxysomg"
end
end

Solutions to Advent of Code 2018 Day 1 in Elixir and Ruby

Day 1 had a simple challenge, compute the sum of a few numbers and find out when a prev frequency shows up again.

Part 1

Elixir

The elixir version is pretty straightforward, we create a stream using the standard input which creates an entry per line, transform it into a number and compute the sum:

1
2
3
4
5
6
7
8
9
10
11
12
13
#!/usr/bin/env elixir
# run it as follows:
# $ ./elixir.exs < input.txt
# 484

# read each line
IO.stream(:stdio, :line)
|> Stream.map(fn str ->
{n, ""} = str |> String.trim() |> Integer.parse()
n
end)
|> Enum.sum()
|> IO.puts()

Ruby

The ruby version is similar to the elixir version

1
2
3
4
5
6
7
8
#!/usr/bin/env ruby
# run it as follows:
# $ ./ruby.rb < input.txt
# 484

puts ARGF.each_line
.map(&:to_i)
.sum

Part 2

Elixir

This was a bit tricky as you may have to repeat the input multiple times to get the answer. To do this in elixir, we use simple recursion if we aren’t able to find a repeating frequency in the current iteration. Also, note that we are using a map to store previous frequencies, I have seen folks use a list which cause a huge impact on performance as lists don’t perform well for lookups whereas maps really shine in these scenarios.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
defmodule ChronalCalibration do

def part_2(input, start_freq \\ 0, prev_freqs \\ %{0 => true}) do
res =
input
|> String.split("\n")
|> Enum.reduce_while({start_freq, prev_freqs}, fn x, {freq, prev_freqs} ->
{i, ""} = Integer.parse(x)
freq = i + freq

if prev_freqs[freq] do
{:halt, {:succ, freq}}
else
{:cont, {freq, Map.put(prev_freqs, freq, true)}}
end
end)

case res do
{:succ, freq} -> freq
{freq, prev_freqs} -> part_2(input, freq, prev_freqs)
end
end

end

IO.puts File.read!("input.txt") |> ChronalCalibration.part_2

Ruby

The Ruby implementation is similar to the Elixir implementation.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
def repeating_freq
input = File.read("input.txt")
prev_freqs = { 0 => true }
freq = 0

loop do
input
.lines
.each do |curr|
freq += curr.to_i
return freq if prev_freqs[freq]
prev_freqs[freq] = true
end
end
end
puts repeating_freq()

How to get an MD5 hash of your request using a Plug and Phoenix

In a recent project, I had to implement idempotent API endpoints in a Phoenix application. And as a part of this, I had to store the MD5 hash of each request body along with its idempotency key and other request data. And the latest Plug makes this easier thanks to this PR.

So, let us take a step back and see what is required. We want to plug into the request pipeline, read the body, compute its md5 hash and store it in a private variable on the connection. Before Plug 1.5.1, if you wanted to read the requested body you had to duplicate your request parsers as they directly read from the connection using Plug.Conn.read_body which read the content, parsed it and discarded it. If you ended up putting your plug before the parsers and read the body, the JSON parser would fail to see any request data as the request would already have been read.

To work around this limitation a new option called body_reader was added to the Plug.Parsers plug. This allows you to get in the middle of the parser and the connection and have custom code which can read the request body, cache it and hand over the request body to the parsers which ends up in a clean implementation. So, let us take a look at the code that is required to add MD5 hashing to our requests.

The first thing we need is a custom request body reader which can be used by our parsers. From the example in the Plug documentation, it can be as simple as the code below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
defmodule BodyReader do

def read_body(conn, opts) do
# read the body from the connection
{:ok, body, conn} = Plug.Conn.read_body(conn, opts)

# compute our md5 hash
md5sum = :crypto.hash(:md5, body) |> Base.encode16()

# shove the md5sum into a private key on the connection
conn = Plug.Conn.put_private(conn, :md5sum, md5sum)

{:ok, body, conn}
end

end

The above module has a read_body function which takes a connection, reads the request body, computes its md5 hash and shoves it into a private key on the connection and returns the body and connection to the parsers to pass them along.

Once we have this custom reader, we just need to configure our parsers to use this by changing our endpoint to have the following code:

1
2
3
4
5
plug Plug.Parsers,
parsers: [:urlencoded, :multipart, :json],
pass: ["*/*"],
body_reader: {BodyReader, :read_body, []},
json_decoder: Phoenix.json_library()

Once you set this up your actions can access the md5sum of a request using conn.private[:md5sum]. And that is all you need to compute the MD5 sum of your requests.

The same technique can be used to authenticate webhook requests from GitHub, Dropbox and other services which sign their requests with an HMAC key.

7 ways of doing Fizz Buzz in Elixir and other clickbaity stuff

Every developer “worth their salt” knows how to implement FizzBuzz, It is a program which prints the following:

1
2
3
4
5
6
7
8
9
10
11
defmodule FizzBuzz do
def run(count) do
Enum.map(1..20, &fizzbuzz/1)
end

def fizzbuzz(n) do
# ...
end
end
iex> FizzBuzz.run(16)
# => 1, 2, Fizz, 4, Buzz, Fizz, 7, 8, Fizz, Buzz, 11, Fizz, 13, 14, FizzBuzz, 16

Let us see a few possible ways to do this in Elixir!

1. Use if

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
def fb_if(n) do
if rem(n, 3) == 0 and rem(n, 5) == 0 do
"FizzBuzz"
else
if rem(n, 3) == 0 do
"Fizz"
else
if rem(n, 5) == 0 do
"Buzz"
else
n
end
end
end
end

That is some real ugly code, it is exacerbated by the fact that Elixir doesn’t allow early returns.

2. Use cond

1
2
3
4
5
6
7
8
def fb_cond1(n) do
cond do
rem(n, 3) == 0 and rem(n, 5) == 0 -> "FizzBuzz"
rem(n, 3) == 0 -> "Fizz"
rem(n, 5) == 0 -> "Buzz"
true -> n
end
end

This is a huge improvement, I really think this captures the problem the solution in a very readable way.

3. Use cond with better predicates

1
2
3
4
5
6
7
8
9
10
11
def divisible_by_3?(n), do: rem(n, 3) == 0
def divisible_by_5?(n), do: rem(n, 5) == 0

def fb_cond2(n) do
cond do
divisible_by_3?(n) and divisible_by_5?(n) -> "FizzBuzz"
divisible_by_3?(n) -> "Fizz"
divisible_by_5?(n) -> "Buzz"
true -> n
end
end

Using separate predicates improves the readability further.

4. Use case

1
2
3
4
5
6
7
8
def fb_case(n) do
case {rem(n, 3), rem(n,5)} do
{0, 0} -> "FizzBuzz"
{0, _} -> "Fizz"
{_, 0} -> "Buzz"
_ -> n
end
end

This is a concise chunk of code but isn’t as readable as the one that uses cond.

5. Use pattern matching in functions

1
2
3
4
5
6
7
8
def fb_fun1(n) do
fb_fun(n, rem(n, 3), rem(n, 5))
end

def fb_fun(_, 0, 0), do: "FizzBuzz"
def fb_fun(_, 0, _), do: "Fizz"
def fb_fun(_, _, 0), do: "Buzz"
def fb_fun(n, _, _), do: n

I think this is actually less readable than the one that uses case as the logic.

6. Use guard clauses in functions

1
2
3
4
def fb_fun2(n) when rem(n, 3) == 0 and rem(n, 5) == 0, do: "FizzBuzz"
def fb_fun2(n) when rem(n, 3) == 0, do: "Fizz"
def fb_fun2(n) when rem(n, 5) == 0, do: "Buzz"
def fb_fun2(n), do: n

This feels like an improvement to the previous implementation readability wise.

7. Use custom made guard clauses in functions

1
2
3
4
5
6
7
defguard is_divisible_by_3(n) when rem(n, 3) == 0
defguard is_divisible_by_5(n) when rem(n, 5) == 0

def fb_fun3(n) when is_divisible_by_3(n) and is_divisible_by_5(n), do: "FizzBuzz"
def fb_fun3(n) when is_divisible_by_3(n), do: "Fizz"
def fb_fun3(n) when is_divisible_by_5(n), do: "Buzz"
def fb_fun3(n), do: n

I like this one a lot, it feels very terse and reads like a mathematical equation.

Which version do you personally find more readable? I feel like the last one and the implementation using cond are the more readable versions.

Here is the piece of code containing all implementations:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
defmodule Fizzbuzz do
def fb_if(n) do
if rem(n, 3) == 0 and rem(n, 5) == 0 do
"FizzBuzz"
else
if rem(n, 3) == 0 do
"Fizz"
else
if rem(n, 5) == 0 do
"Buzz"
else
n
end
end
end
end

def fb_cond1(n) do
cond do
rem(n, 3) == 0 and rem(n, 5) == 0 -> "FizzBuzz"
rem(n, 3) == 0 -> "Fizz"
rem(n, 5) == 0 -> "Buzz"
true -> n
end
end

def divisible_by_3?(n), do: rem(n, 3) == 0
def divisible_by_5?(n), do: rem(n, 5) == 0

def fb_cond2(n) do
cond do
divisible_by_3?(n) and divisible_by_5?(n) -> "FizzBuzz"
divisible_by_3?(n) -> "Fizz"
divisible_by_5?(n) -> "Buzz"
true -> n
end
end

def fb_case(n) do
case {rem(n, 3), rem(n, 5)} do
{0, 0} -> "FizzBuzz"
{0, _} -> "Fizz"
{_, 0} -> "Buzz"
_ -> n
end
end

def fb_fun1(n) do
fb_fun1(n, rem(n, 3), rem(n, 5))
end

def fb_fun1(_, 0, 0), do: "FizzBuzz"
def fb_fun1(_, 0, _), do: "Fizz"
def fb_fun1(_, _, 0), do: "Buzz"
def fb_fun1(n, _, _), do: n

def fb_fun2(n) when rem(n, 3) == 0 and rem(n, 5) == 0, do: "FizzBuzz"
def fb_fun2(n) when rem(n, 3) == 0, do: "Fizz"
def fb_fun2(n) when rem(n, 5) == 0, do: "Buzz"
def fb_fun2(n), do: n

defguard is_divisible_by_3(n) when rem(n, 3) == 0
defguard is_divisible_by_5(n) when rem(n, 5) == 0

def fb_fun3(n) when is_divisible_by_3(n) and is_divisible_by_5(n), do: "FizzBuzz"
def fb_fun3(n) when is_divisible_by_3(n), do: "Fizz"
def fb_fun3(n) when is_divisible_by_5(n), do: "Buzz"
def fb_fun3(n), do: n

end


ExUnit.start()

defmodule FizzbuzzTest do
use ExUnit.Case

@run_20_out [
1,
2,
"Fizz",
4,
"Buzz",
"Fizz",
7,
8,
"Fizz",
"Buzz",
11,
"Fizz",
13,
14,
"FizzBuzz",
16,
17,
"Fizz",
19,
"Buzz"
]

def assert_fizzbuzz(fun) do
assert 1..20 |> Enum.map(fn x -> apply(Fizzbuzz, fun, [x]) end) == @run_20_out
end

test "run" do
assert_fizzbuzz(:fb_if)
assert_fizzbuzz(:fb_cond1)
assert_fizzbuzz(:fb_cond2)
assert_fizzbuzz(:fb_case)
assert_fizzbuzz(:fb_fun1)
assert_fizzbuzz(:fb_fun2)
assert_fizzbuzz(:fb_fun3)
end
end

How to manage ginormous ExVCR cassettes

ExVCR is an awesome elixir package which helps you create repeatable integration tests. It works by saving the requests and responses you make to external services, in json files called cassettes. These cassettes are created the first time you run the tests. Here is an example from one of my projects:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
defmodule X.AuthControllerTest do
use X.ConnCase
use ExVCR.Mock, adapter: ExVCR.Adapter.Hackney

test "google redirect creates the user", %{conn: conn} do
conn =
use_cassette "google_oauth_challenge" do
conn
|> get(
"/auth/google/callback?code=....................."
)
end

assert ...
# ...

end
end

The first time I run this test it creates a file at fixture/vcr_cassettes/google_oauth_challenge.json. This contains the request and response data from this test. Now, whenever you rerun this test it just loads this request and response data and replays the response if the request matches the saved request. So, your tests will pass even when you don’t have network connectivity!

This is all nice as long as you have small responses. However, when you need to do this for responses whose size is in MBs you run into a few issues:

  1. Your tests become slower.
  2. You don’t want to haul around MBs of fixtures in your source control (you must check in the cassettes i.e. the fixture/ directory).

I recently ran into this situation with one of my projects where I was downloading a gzipped csv file which was ~50MB. The way I fixed it is:

  1. Let ExVCR record the cassettes with all the large data.
  2. Download the same response from outside using curl.
  3. Take a tiny sample of the response data
  4. Replace the response.body in the cassette with my sample response data

This fixes the slow tests and also reduces the size of cassettes.

Another thing to remember is if your response is binary and not a valid string, ExVCR stores it differently. Its json serializer runs binary data through the following functions

1
2
3
response.body
|> :erlang.term_to_binary()
|> Base.encode64()

So, if you have gzipped (or any other binary) data, load it into IEx from step 2 above and run it through the same functions as above and use this data to replace the response.body. You can also skip 2 and use the data from your large cassette before trimming it down by running it through:

1
2
3
4
5
cassette = File.read!("fixture/vcr_cassettes/cassette.json") |> Poison.decode! |> hd
cassette["response"]["body"]
|> Base.decode64!
|> :erlang.binary_to_term # you'll have the raw response binary data at this point
|> :zlib.gunzip # if your response is gzipped you'll need this to extract the actual data

P.S My editor froze while trying to edit such a large cassette, so I used jq to remove the large body keys from the response using the following command:

1
jq 'walk(if type == "object" and has("body") then del(.body) else . end)' mycassette.json  > cassette_without_body.json

To get the above working you need jq > 1.5 and the following in your ~/.jq

1
2
3
4
5
6
7
8
9
# Apply f to composite entities recursively, and to atoms
def walk(f):
. as $in
| if type == "object" then
reduce keys_unsorted[] as $key
( {}; . + { ($key): ($in[$key] | walk(f)) } ) | f
elif type == "array" then map( walk(f) ) | f
else f
end;

Your Elixir bucket for debugging

While debugging and experimenting with apis in javascript, I usually set a variable on window and then play with it in the console.

1
2
3
4
5
6

// ....

let channel = socket.channel( //...

window.channel = channel;

Once the above code is set, I can just jump into the console and start pushing messages to the channel and discover how it all works by poking it around.

This led me to think about how this can be done in Elixir. And, in one of my recent projects, I was spawning a few processes and wanted to get their pids to play around by sending messages to them. And I just thought why not use ets as global bucket to shove stuff into. Here is my solution:

Just create an ets table in your .iex.exs

1
2
3
# .iex.exs
# A tmp bucket which you can push stuff into and analyze from iex
:ets.new(:tmp, [:named_table, :public])

And, wherever you want, just push things into this table:

1
2
3
4
# in my app
def websocket_info(_msg, _conn, state) do
:ets.insert(:tmp, {self(), :ok})
# ...

Once you have that, you can start poking and prodding your processes/variables

1
2
# iex>
:ets.tab2list(:tmp)

Hope this helps you while debugging your Elixir apps :D

How to implement your own :timer.sleep in Elixir

Elixir has :timer.sleep(ms), a function which allows you to pause a process’s execution for an arbitrary number of milliseconds.

We can implement the same using messages and a timeout in the following ways.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
defmodule SleepTimeout do
def sleep(milliseconds) do
# use a receive block with no matching blocks and an after block
# which times out after the input number of milliseconds
receive do
after
milliseconds ->
:ok
end
end
end

defmodule SleepMsg do
def sleep(milliseconds) do
# create a unique ref, so that we don't stop on any random `:timeout` message.
ref = make_ref()
# schedule a message to be sent to ourselves in the input number of milliseconds
Process.send_after(self(), {:timeout, ref}, milliseconds)

receive do
# wait for our message to arrive
{:timeout, ^ref} ->
:ok
end
end
end

{microseconds, _} = :timer.tc(fn -> SleepTimeout.sleep(1000) end)
IO.puts("slept for #{round(microseconds / 1000)} microseconds")
# => slept for 1000 microseconds


{microseconds, _} = :timer.tc(fn -> SleepMsg.sleep(1000) end)
IO.puts("slept for #{round(microseconds / 1000)} microseconds")
# => slept for 1001 microseconds

You may ask, what is the frigging point of all of this when we have :timer.sleep? Well, there is no point. This was just a fun little exercise :D

How to setup your hexo blog to be automatically published using Travis CI

GitHub Pages has recently finished one of the long standing feature requests of allowing SSL on custom domains!. I have it enabled on my blog (https://minhajuddin.com). Yay! However, I have not been able to publish any new blog posts because the effort required to publish a new post is a bit too much! The previous service that I had used to auto publish was shut down. And looking at the alternatives, Travis CI looked great. I use it for a few of my other projects.

Here is the .travis.yml with a few slight modifications from: https://github.com/jkeylu/deploy-hexo-site-by-travis-ci/blob/master/_travis.yml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Deploy hexo site by travis-ci
# https://github.com/jkeylu/deploy-hexo-site-by-travis-ci
# 1. Copy this file to the root of your repository, then rename it to '.travis.yml'
# 2. Replace 'YOUR NAME' and 'YOUR EMAIL' at line 29
# 3. Add an Environment Variable 'DEPLOY_REPO'
# 1. Generate github access token on https://github.com/settings/applications#personal-access-tokens
# 2. Add an Environment Variable on https://travis-ci.org/{github username}/{repository name}/settings/env_vars
# Variable Name: DEPLOY_REPO
# Variable Value: https://{githb access token}@github.com/{github username}/{repository name}.git
# Example: DEPLOY_REPO=https://6b75cfe9836f56e6d21187622730889874476c23@github.com/jkeylu/test-hexo-on-travis-ci.git
# 4. Make sure Travis is configured to hide your Variable, else others will see your access token and can mess with all your repos.

language: node_js
node_js:
- "9"

branches:
only:
- master

install:
- npm install

before_script:
- git config --global user.name 'Khaja Minhajuddin'
- git config --global user.email 'minhajuddin.k@gmail.com'

script:
- ./node_modules/.bin/hexo generate

after_success:
- mkdir .deploy
- cd .deploy
- git clone --depth 1 --branch gh-pages --single-branch $DEPLOY_REPO . || (git init && git remote add -t gh-pages origin $DEPLOY_REPO)
- rm -rf ./* # Clear old verion
- cp -r ../public/* . # Copy over files for new version
- git add -A .
- git commit -m 'Site updated' # Make a new commit for new version
- git branch -m gh-pages
- git push -q -u origin gh-pages # Push silently so we don't leak information

ets versus redis benchmarks for a simple key value store

The project that I am currently working on has a huge data set of static lookup data. And, we have been using Redis to store this data since the beginning of the project. We figured, redis would be the fastest as the whole data is in memory. However, in our production use we have found redis to be the bottleneck.

This is not really redis’ fault as the data access pattern that we have involves a huge number of lookups more than 10K lookups per request. Also, since redis runs on a single core, it isn’t able to use all the cores on our server. Add the network costs and the serialization costs to it and things add up very quickly.

This led me to do some benchmarking of redis against ets with our actual production data and (un)surprisingly we found that ets beats Redis for simple key value data. So, if you are using redis as a key value store. Please do yourself a favor and use ets (If you are using Elixir or erlang).

I created a simple mix project which benchmarks ets and redis (https://github.com/minhajuddin/redis_vs_ets_showdown)

Go ahead and try it out by tweaking the count of records or the parallelism.

We found that the ets to redis performance gap actually grows as the parallelism increases.

Checkout the repository for the benchmark data: https://github.com/minhajuddin/redis_vs_ets_showdown

You can also check the reports at:

  1. https://minhajuddin.github.io/redis_vs_ets_showdown/reports/benchmark-1000.html
  2. https://minhajuddin.github.io/redis_vs_ets_showdown/reports/benchmark-1000000.html

Here is the gist of the benchmark:

Quick explanation of names

ets_get_1000: does an ets lookup 1000 times

redis_get_1000: does a redis lookup 1000 times using HGET

ets_get_multi: does an ets lookup 1000 times

redis_get_multi: does a single HMGET Redis lookup

Benchmark for 1000_000 records

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
Operating System: Linux
CPU Information: Intel(R) Core(TM) i7-4700MQ CPU @ 2.40GHz
Number of Available Cores: 8
Available memory: 12.019272 GB
Elixir 1.5.1
Erlang 20.1
Benchmark suite executing with the following configuration:
warmup: 2.00 s
time: 30.00 s
parallel: 4
inputs: none specified
Estimated total run time: 2.13 min


Name ips average deviation median
ets_get_multi 3.31 K 0.30 ms ±20.60% 0.28 ms
ets_get_1000 2.87 K 0.35 ms ±75.38% 0.31 ms
redis_get_multi 0.34 K 2.95 ms ±17.46% 3.01 ms
redis_get_1000 0.0122 K 82.15 ms ±15.77% 77.68 ms

Comparison:
ets_get_multi 3.31 K
ets_get_1000 2.87 K - 1.15x slower
redis_get_multi 0.34 K - 9.76x slower
redis_get_1000 0.0122 K - 271.91x slower

Benchmark for 1000 records

1
2
3
4
5
6
7
8
9
10
11
Name                      ips        average  deviation         median
ets_get_multi 4.06 K 0.25 ms ±12.31% 0.24 ms
ets_get_1000 3.96 K 0.25 ms ±18.72% 0.23 ms
redis_get_multi 0.34 K 2.90 ms ±12.34% 2.99 ms
redis_get_1000 0.0115 K 87.27 ms ±17.31% 81.36 ms

Comparison:
ets_get_multi 4.06 K
ets_get_1000 3.96 K - 1.02x slower
redis_get_multi 0.34 K - 11.78x slower
redis_get_1000 0.0115 K - 354.04x slower

Performance variations of string substitution in Elixir

I had to do some string stripping in one of my apps which was a bit performance sensitive. I ended up benching multiple approaches to see the speed differences. The results are not that suprising.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

path = "/haha/index.html"
subdomain_rx = ~r(^\/[^\/]+)

Benchee.run(%{
"pattern_match_bytes" => fn ->
len = byte_size("/haha")
<<_::bytes-size(len), rest :: binary >> = path
rest
end,
"pattern_match" => fn -> "/haha" <> rest = path; rest end,
"slice" => fn -> String.slice(path, String.length("/haha")..-1) end,
"replace_prefix" => fn -> String.replace_prefix(path, "/haha", "") end,
"split" => fn -> String.splitter(path, "/") |> Enum.drop(1) |> Enum.join("/") end,
"regex" => fn -> String.replace(path, subdomain_rx, "") end,
})

output of benchee

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
bench [master] $ mix run lib/bench.exs
Operating System: Linux
CPU Information: Intel(R) Core(TM) i7-4700MQ CPU @ 2.40GHz
Number of Available Cores: 8
Available memory: 12.019316 GB
Elixir 1.4.4
Erlang 20.0-rc2
Benchmark suite executing with the following configuration:
warmup: 2.00 s
time: 5.00 s
parallel: 1
inputs: none specified
Estimated total run time: 42.00 s


Benchmarking pattern_match...
Warning: The function you are trying to benchmark is super fast, making measures more unreliable! See: https://github.com/PragTob/benchee/wiki/Benchee-Warnings#fast-execution-warning

You may disable this warning by passing print: [fast_warning: false] as configuration options.

Benchmarking pattern_match_bytes...
Warning: The function you are trying to benchmark is super fast, making measures more unreliable! See: https://github.com/PragTob/benchee/wiki/Benchee-Warnings#fast-execution-warning

You may disable this warning by passing print: [fast_warning: false] as configuration options.

Benchmarking regex...
Benchmarking replace_prefix...
Warning: The function you are trying to benchmark is super fast, making measures more unreliable! See: https://github.com/PragTob/benchee/wiki/Benchee-Warnings#fast-execution-warning

You may disable this warning by passing print: [fast_warning: false] as configuration options.

Benchmarking slice...
Benchmarking split...

Name ips average deviation median
pattern_match_bytes 24.05 M 0.0416 μs ±1797.73% 0.0300 μs
pattern_match 22.37 M 0.0447 μs ±1546.59% 0.0400 μs
replace_prefix 3.11 M 0.32 μs ±204.05% 0.22 μs
slice 1.25 M 0.80 μs ±6484.21% 1.00 μs
split 0.75 M 1.34 μs ±3267.35% 1.00 μs
regex 0.42 M 2.37 μs ±1512.77% 2.00 μs

Comparison:
pattern_match_bytes 24.05 M
pattern_match 22.37 M - 1.08x slower
replace_prefix 3.11 M - 7.73x slower
slice 1.25 M - 19.30x slower
split 0.75 M - 32.18x slower
regex 0.42 M - 57.00x slower

So, the next time you want to strip prefixing stuff, use pattern matching :)

Update

Based on the comments by @Matt Widmann and @Peter I did a quick test of replacing the tail of the string using the following code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
path = "/haha/index.html"

ext_rx = ~r/\.[^\.]+$/

Benchee.run(%{
"reverse_pattern_match_bytes" => fn ->
len = byte_size(".html")
<<_::bytes-size(len), rest :: binary >> = String.reverse(path)
rest |> String.reverse
end,
"reverse_pattern_match" => fn -> "lmth." <> rest = String.reverse(path); String.reverse(rest) end,
"slice" => fn -> String.slice(path, 0..(String.length(".html") * (-1))) end,
"replace_suffix" => fn -> String.replace_suffix(path, ".html", "") end,
"split" => fn -> String.splitter(path, ".") |> Enum.slice(0..-2) |> Enum.join(".") end,
"regex" => fn -> String.replace(path, ext_rx, "") end,
})

The results for this are:

1
2


This led be to look at the actual Elixir code for replace_prefix and replace_suffix which is:

https://github.com/elixir-lang/elixir/blob/master/lib/elixir/lib/string.ex#L752

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

def replace_prefix(string, match, replacement)
when is_binary(string) and is_binary(match) and is_binary(replacement) do
prefix_size = byte_size(match)
suffix_size = byte_size(string) - prefix_size

case string do
<<prefix::size(prefix_size)-binary, suffix::size(suffix_size)-binary>> when prefix == match ->
replacement <> suffix
_ ->
string
end
end

def replace_suffix(string, match, replacement)
when is_binary(string) and is_binary(match) and is_binary(replacement) do
suffix_size = byte_size(match)
prefix_size = byte_size(string) - suffix_size

case string do
<<prefix::size(prefix_size)-binary, suffix::size(suffix_size)-binary>> when suffix == match ->
prefix <> replacement
_ ->
string
end
end

I tweaked the benchmark code a little to run each replace a 1000 times to remove the “too fast” warning.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46

defmodule Bench do
def run(fun), do: fun.()
def no_run(_fun), do: :ok
def times(n \\ 1000, fun), do: fn -> Enum.each(1..n, fn _ -> fun.() end) end
end

# match beginning of string
Bench.run(fn ->
path = "/haha/index.html"

subdomain_rx = ~r(^\/[^\/]+)

Benchee.run(%{
"pattern_match_bytes" => Bench.times(fn ->
len = byte_size("/haha")
<<_::bytes-size(len), rest :: binary >> = path
rest
end),
"pattern_match" => Bench.times(fn -> "/haha" <> rest = path; rest end),
"slice" => Bench.times(fn -> String.slice(path, String.length("/haha")..-1) end),
"replace_prefix" => Bench.times(fn -> String.replace_prefix(path, "/haha", "") end),
"split" => Bench.times(fn -> String.splitter(path, "/") |> Enum.drop(1) |> Enum.join("/") end),
"regex" => Bench.times(fn -> String.replace(path, subdomain_rx, "") end),
})
end)

# match end of string string
Bench.run(fn ->
path = "/haha/index.html"

ext_rx = ~r/\.[^\.]+$/

Benchee.run(%{
"reverse_pattern_match_bytes" => Bench.times(fn ->
len = byte_size(".html")
<<_::bytes-size(len), rest :: binary >> = String.reverse(path)
rest |> String.reverse
end),
"reverse_pattern_match" => Bench.times(fn -> "lmth." <> rest = String.reverse(path); String.reverse(rest) end),
"slice" => Bench.times(fn -> String.slice(path, 0..(String.length(".html") * (-1))) end),
"replace_suffix" => Bench.times(fn -> String.replace_suffix(path, ".html", "") end),
"split" => Bench.times(fn -> String.splitter(path, ".") |> Enum.slice(0..-2) |> Enum.join(".") end),
"regex" => Bench.times(fn -> String.replace(path, ext_rx, "") end),
})
end)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
elixir_benchmarks [master *] $ mix run lib/bench.exs

Operating System: Linux
CPU Information: Intel(R) Core(TM) i7-4700MQ CPU @ 2.40GHz
Number of Available Cores: 8
Available memory: 12.019316 GB
Elixir 1.4.4
Erlang 20.0-rc2
Benchmark suite executing with the following configuration:
warmup: 2.00 s
time: 5.00 s
parallel: 1
inputs: none specified
Estimated total run time: 42.00 s


Benchmarking pattern_match...
Benchmarking pattern_match_bytes...
Benchmarking regex...
Benchmarking replace_prefix...
Benchmarking slice...
Benchmarking split...

Name ips average deviation median
pattern_match_bytes 15.17 K 0.0659 ms ±18.05% 0.0610 ms
pattern_match 14.60 K 0.0685 ms ±17.41% 0.0640 ms
replace_prefix 2.52 K 0.40 ms ±21.46% 0.38 ms
slice 0.83 K 1.20 ms ±21.95% 1.11 ms
split 0.58 K 1.72 ms ±16.76% 1.63 ms
regex 0.45 K 2.24 ms ±7.42% 2.22 ms

Comparison:
pattern_match_bytes 15.17 K
pattern_match 14.60 K - 1.04x slower
replace_prefix 2.52 K - 6.01x slower
slice 0.83 K - 18.24x slower
split 0.58 K - 26.10x slower
regex 0.45 K - 33.98x slower
Operating System: Linux
CPU Information: Intel(R) Core(TM) i7-4700MQ CPU @ 2.40GHz
Number of Available Cores: 8
Available memory: 12.019316 GB
Elixir 1.4.4
Erlang 20.0-rc2
Benchmark suite executing with the following configuration:
warmup: 2.00 s
time: 5.00 s
parallel: 1
inputs: none specified
Estimated total run time: 42.00 s


Benchmarking regex...
Benchmarking replace_suffix...
Benchmarking reverse_pattern_match...
Benchmarking reverse_pattern_match_bytes...
Benchmarking slice...
Benchmarking split...

Name ips average deviation median
replace_suffix 2633.75 0.38 ms ±21.15% 0.36 ms
split 618.06 1.62 ms ±13.56% 1.57 ms
regex 389.25 2.57 ms ±6.54% 2.54 ms
slice 324.19 3.08 ms ±19.06% 2.88 ms
reverse_pattern_match_bytes 275.45 3.63 ms ±12.08% 3.48 ms
reverse_pattern_match 272.06 3.68 ms ±11.99% 3.54 ms

Comparison:
replace_suffix 2633.75
split 618.06 - 4.26x slower
regex 389.25 - 6.77x slower
slice 324.19 - 8.12x slower
reverse_pattern_match_bytes 275.45 - 9.56x slower
reverse_pattern_match 272.06 - 9.68x slower
elixir_benchmarks [master *] $

For reverse string removal from the end, replace_suffix is the fastest which makes sense. However, for removing the prefix, pattern_match_bytes seems to be the fastest. But, it isn’t really truly correct. Because in my instance, I know for sure that the prefix is present. So, the second best performance for which is pattern_match is 6x better than the current String.replace_prefix implementation.

It may be because I am using OTP 20? I’ll run this on other versions of OTP to compare results. And, if the results are cosistent, will create PR on elixir to change the default implementation.

Optimal order for adding lists in Elixir

Lists are the bread and butter of a functional language and Elixir is no different.

Elixir uses linked lists to represent lists. Which means, if a list is n elements long it will take n dereferences to get to the last element of the list. This understanding is very important for writing efficient code in Elixir. Because of this adding to the head of a list is nearly instantaneous.

Adding to the beginning of a list

Let us take the following list as an example:

el1 -> el2 -> el3 -> el4

It has 4 elements and el1 is the head of the list. To add a new element el0 to the beginning of the list, All you need to do is create a node to store el0 and set its next pointer to el1. This changes the representation to:

el0 -> el1 -> el2 -> el3 -> el4

Now, one thing to note is: if a previous variable has a reference to el1 it will still have a reference to the earlier 4 element list. So, we are not mutating/chaning the existing list/references.

Adding to the end of a list

However, adding something to the end is not the same. Let us take the previous example:

el1 -> el2 -> el3 -> el4

Now, if this list is referenced by a binding foo. And if we want to create a new list bar with a new element el5 at the end. We can’t just traverse the list, create a new node with value el5 and set a reference from el4 to el5. If we did that, the reference foo would also get a new element at the end. And this is not how Elixir/Erlang work. The BEAM does not allow mutation to existing data. So, to work within this framework, we have to create a brand new list containing a copy of all elements el1..el4 and a new node el5. That is why adding elements to the tail of a linked list is slow in Elixir. Because we end up copying the list and appending a new element.

Now, with this understanding. Let us think of the most efficient way of combining two lists when the order of elements doesn’t matter. For instance, when you send http requests using httpoison the order of the headers doesn’t matter. So, when you have the following implementations available:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# ...

# A: First list is small most of the time
@default_headers [{"content-type", "application/json"}, {"authorization", "Bearer Foo"}, {"accept", "application/json"}]
def get(url, headers \\ []) do
headers ++ @default_headers
end

# B: Second list is small most of the time
@default_headers [{"content-type", "application/json"}, {"authorization", "Bearer Foo"}, {"accept", "application/json"}]
def get(url, headers \\ []) do
@default_headers ++ headers
end

# ...

Pick the one where the first list has lesser elements. In this example that would be the A implementation.

I did a quick benchmark just for kicks (Full code available at https://github.com/minhajuddin/bench_list_cat):

Benchmark

Elixir 1.4.4
Erlang 20.0-rc2
Benchmark suite executing with the following configuration:
warmup: 2.00 s
time: 5.00 s
parallel: 1
inputs: none specified
Estimated total run time: 14.00 s


Benchmarking big_list_first...
Benchmarking small_list_first...

Name                       ips        average  deviation         median
small_list_first        6.49 K       0.154 ms   ±371.63%      0.0560 ms
big_list_first       0.00313 K      319.87 ms    ±37.78%      326.10 ms

Comparison:
small_list_first        6.49 K
big_list_first       0.00313 K - 2077.49x slower

Code used for benchmarking

small_list = Enum.to_list(1..10_000)
big_list = Enum.to_list(1..10_000_000)

Benchee.run(%{
  "small_list_first" => fn -> small_list ++ big_list end,
  "big_list_first" => fn -> big_list ++ small_list end
})

Note that this is an outrageous benchmark, no one is adding lists containing 10 million elements this way ;). But it demonstrates my point.

How to pass a multi line copy sql to psql

I have been working with a lot of ETL stuff lately and have to import/export data from our postgresql database frequently.

While writing a script recently, I found that psql doesn’t allow using the \COPY directive with a multi line SQL when it is passed to the psql command. The only workaround seemed to be squishing the sql into a single line. However, this makes it very difficult to read and modify the SQL. This is when bash came to my rescue :)

Here is a hacky way to do use multiline SQL with \COPY.

This just strips the newlines before sending it to psql. Have your cake and eat it too :)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

# Using a file
psql -f <(tr -d '\n' < ~/s/test.sql )
# or
psql < <(tr -d '\n' < ~/s/test.sql )

# Putting the SQL using a HEREDOC
cat <<SQL | tr -d '\n' | \psql mydatabase
\COPY (
SELECT
provider_id,
provider_name,
...
) TO './out.tsv' WITH( DELIMITER E'\t', NULL '', )
SQL

How to debug/view phoenix channel network traffic in Google Chrome

When you open Google Chrome’s Network tab, you don’t see the traffic for websockets. I spent quite some time trying to see the messages between my browser and the phoenix server. I was expecting to see a lot of rows in the network tab one for every message.

However, since websockets don’t follow a request-response pattern. They are shown in a different tab. To see the messages sent on the websocket. Click on your Network tab and then click on the websocket request. This should show you a pane with a Frames tab. This Frames tab should show you all the messages that are being sent back and forth on the websocket.

Here is a screenshot:

3 things that are needed to make a successful product

In my 10 years in the software industry. I have create a number of products and have worked on a lot of projects. Looking back at the products/projects that have been successful, one this stands out. There are 3 critical pieces for a software product / startup.

0. A value proposition

This is a dead giveaway. So, I haven’t even counted this. Without a value proposition you don’t have anything. Your product must provide value to your customers.

1. Domain knowledge

You need to have someone on your team with the knowledge of the domain. Ideally you would have come up with a product idea because of a good understanding of the pain points. And of the things that can provide value. This is also fairly easy to understand.

2. Marketing Strength

You also need to have someone who can market your product. An awesome product without marketing is a dead product. You need to either build your marketing expertise or get someone who is good at it.

Marketing is one of the things that is often overlooked. People think that if the product is good people will buy it. This is completely false. You need a lot of hustle to market your product.

3. Technical expertise

You obviously need someone who can build a usable product which provides value. But this is the last on the list.

Many people come to me with ideas for startups. I always tell them about these 3 things. The next time you want to build a startup, think about these skills. Without one of these you are dead in the water.

Getting started with Elm

I had a good time presenting a talk about “Getting Started with Elm” at the awesome nghyderabad The audience was very interactive and the food was great. Shout out to Fission Labs for the awesome venue!

Here are a few useful links which should help you learn Elm

  1. elm-lang home page: http://elm-lang.org/
  2. docs: http://elm-lang.org/docs
  3. examples: http://elm-lang.org/examples
  4. A handy online book: https://guide.elm-lang.org/
  5. Online editor: http://elm-lang.org/try
  6. Elm packages website: http://package.elm-lang.org/

Code for the app that we built: https://github.com/minhajuddin/getting-started-with-elm/blob/master/Counter.elm

How to fix Ecto duplicate name migrations error

If you run into the following error while running your Ecto migrations

ReleaseTasks.migrate
** (Ecto.MigrationError) migrations can't be executed, migration name foo_bar is duplicated
   (ecto) lib/ecto/migrator.ex:259: Ecto.Migrator.ensure_no_duplication/1
   (ecto) lib/ecto/migrator.ex:235: Ecto.Migrator.migrate/4

You can fix it by running 1 migration at a time

mix ecto.migrate --step 1

This happens when you are trying to run two migrations with the same name (regardless of the timestamps). By restricting it to run 1 migration at a time you won’t run into this issue.

Ideally you should not have 2 migrations with the same name :)

Lightweight xml utility to pluck elements

jq is an awesome utility for parsing and transforming json via the command line. I wanted something similar for xml. The following is a short ruby script which does a tiny tiny (did I say tiny?) bit of what jq does for xml. Hope you find it useful.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/usr/bin/env ruby
# Author: Khaja Minhajuddin

require 'nokogiri'
require 'parallel'
require 'etc'

if ARGV.count < 2
puts <<-EOS
Usage:
xml_pluck xpath file1.xml file2.xml

e.g.
xml_pluck "//children/name/text()" <(echo '<?xml version="1.0"?><children><name>Zainab</name><name>Mujju</name></children>')
# prints
Zainab
Mujju
EOS
exit -1
end

xpath = ARGV.shift
Parallel.each(ARGV, in_processes: Etc.nprocessors) do |file|
doc = Nokogiri::XML(File.read(file))
puts doc.xpath(xpath)
end

Key transformation after decoding json in Elixir

In a previous blog post we saw how to do case insensitive retrieval from maps. A better solution for this if there are many key lookups is to transform the input by lower casing all the keys just after decoding. The solution from the blog post would iterate over each {key, value} pair till it found the desired key. However a proper map lookup doesn’t iterate over the keys but uses a hashing algorithm to get to the key’s location in constant time regardless of the size of the map.

Anyway, Here is the solution to transform each key for input JSON. Hope you find it useful :)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
defmodule KeyTransformer do
def lower_case_keys(input) do
transform_keys(input, &String.downcase/1)
end

def transform_keys(input, tx_key_fun) when is_list(input) do
Enum.map(input, fn el -> transform_keys(el, tx_key_fun) end)
end

def transform_keys(input, tx_key_fun) when is_map(input) do
Enum.reduce(input, %{}, fn {k, v}, acc ->
Map.put(acc, tx_key_fun.(k), transform_keys(v, tx_key_fun))
end)
end

def transform_keys(value, _tx_key_fun), do: value
end

ExUnit.start

defmodule KeyTransformerTest do
use ExUnit.Case
import KeyTransformer

test "simple map" do
assert lower_case_keys(%{"NAME" => "Khaja"}) == %{"name" => "Khaja"}
assert lower_case_keys(%{"NAME" => "Khaja", "Age" => 3}) == %{"name" => "Khaja", "age" => 3}
end

test "nested map" do
assert lower_case_keys(%{"Mujju" => %{"NAME" => "Khaja"}}) == %{"mujju" => %{"name" => "Khaja"}}
end

test "deeply nested map" do
assert lower_case_keys(%{"Children" => %{"Mujju" => %{"NAME" => "Khaja"}}}) == %{"children" => %{"mujju" => %{"name" => "Khaja"}}}
end

test "list of maps" do
assert lower_case_keys([%{"NAME" => "Zainu"}]) == [%{"name" => "Zainu"}]

assert lower_case_keys([%{
"NAME" => "Khaja Muzaffaruddin",
"agE" => 2,
}, %{}]) == [%{"age" => 2, "name" => "Khaja Muzaffaruddin"}, %{}]
end

test "nested list of maps" do
assert lower_case_keys(%{
"JUlian" => [%{"Movie" => "Madagascar"}]
}) == %{"julian" => [%{"movie" => "Madagascar"}]}
end

test "deeply nested list of maps" do
assert lower_case_keys(%{"MovieGenres" => [%{
"JUlian" => [%{"Movie" => "Madagascar"}]
}, %{"Ho" => 33}], "OK then" => "little story"}) == %{
"moviegenres" => [%{"julian" => [%{"movie" => "Madagascar"}]}, %{"ho" => 33}], "ok then" => "little story"
}
end
end