From 2ee58e2c06ef02680ee7f9f12d34fbd98519b331 Mon Sep 17 00:00:00 2001 From: kballou Date: Fri, 10 Feb 2017 13:32:20 -0700 Subject: Migrate blog content to new repository --- LICENSE | 674 +++++++++++++++++++ Makefile | 3 - README.markdown | 22 +- blag/Makefile | 14 - blag/config.yaml | 14 - blag/content/about-me.markdown | 31 - blag/content/blog/Spark.markdown | 738 --------------------- blag/content/blog/Storm-vs-Spark.markdown | 461 ------------- blag/content/blog/Storm.markdown | 593 ----------------- blag/content/blog/coreboot-x230.markdown | 498 -------------- blag/content/blog/distributed_systems.markdown | 188 ------ blag/content/blog/elixir_hot_swap_code.markdown | 598 ----------------- blag/content/blog/elixir_otp_releases.markdown | 324 --------- blag/content/blog/git-in-reverse.markdown | 684 ------------------- .../content/blog/git-resurrecting-history.markdown | 525 --------------- blag/content/blog/hunk_editing.markdown | 282 -------- blag/content/blog/vim_tips_2015_03_17.markdown | 85 --- blag/content/blog/vim_tips_2015_05_07.markdown | 141 ---- blag/layouts/404.html | 7 - blag/layouts/_default/list.html | 0 blag/layouts/_default/single.html | 11 - blag/layouts/blog/li.html | 0 blag/layouts/blog/single.html | 17 - blag/layouts/blog/summary.html | 20 - blag/layouts/index.html | 9 - blag/layouts/partials/footer.html | 14 - blag/layouts/partials/head_includes.html | 3 - blag/layouts/partials/header.html | 20 - blag/layouts/partials/meta.html | 2 - blag/layouts/partials/subheader.html | 43 -- blag/layouts/rss.xml | 21 - blag/layouts/shortcodes/video.html | 6 - blag/layouts/single.html | 5 - blag/layouts/sitemap.xml | 16 - blag/static/css/site.css | 130 ---- blag/static/favicon.ico | Bin 1150 -> 0 bytes blag/static/media/SentimentAnalysisTopology.png | Bin 12248 -> 0 bytes blag/static/media/code-branching.png | Bin 11121 -> 0 bytes blag/static/media/coreboot-x230-1.png | Bin 888438 -> 0 bytes blag/static/media/coreboot-x230-2.png | Bin 912897 -> 0 bytes blag/static/media/coreboot-x230-3.png | Bin 1070183 -> 0 bytes blag/static/media/git-branching-1.png | Bin 18663 -> 0 bytes blag/static/media/git-commit-1.png | Bin 22522 -> 0 bytes blag/static/media/git-ff-merge-1.png | Bin 13500 -> 0 bytes blag/static/media/git-ff-merge-2.png | Bin 16779 -> 0 bytes blag/static/media/git-repo-state-1.svg | 162 ----- blag/static/media/git-repo-state-2.svg | 221 ------ blag/static/media/git-repo-state-3.svg | 220 ------ blag/static/media/git-repo-state-4.svg | 232 ------- blag/static/media/git-repo-state-5.svg | 249 ------- blag/static/media/git-resolve-merge.png | Bin 14566 -> 0 bytes blag/static/media/git-tree-1.png | Bin 22500 -> 0 bytes blag/static/media/git-tree-2.png | Bin 15028 -> 0 bytes blag/static/media/spark_issues_chart.png | Bin 10475 -> 0 bytes blag/static/media/storm_issues_chart.png | Bin 9893 -> 0 bytes blag/static/media/videos/comment.ogg | Bin 280358 -> 0 bytes blag/static/media/videos/cw.ogg | Bin 221082 -> 0 bytes blag/static/media/videos/indent.ogg | Bin 290204 -> 0 bytes blag/static/media/videos/macros.ogg | Bin 691466 -> 0 bytes blag/static/robots.txt | 11 - 60 files changed, 679 insertions(+), 6615 deletions(-) create mode 100644 LICENSE delete mode 100644 blag/Makefile delete mode 100644 blag/config.yaml delete mode 100644 blag/content/about-me.markdown delete mode 100644 blag/content/blog/Spark.markdown delete mode 100644 blag/content/blog/Storm-vs-Spark.markdown delete mode 100644 blag/content/blog/Storm.markdown delete mode 100644 blag/content/blog/coreboot-x230.markdown delete mode 100644 blag/content/blog/distributed_systems.markdown delete mode 100644 blag/content/blog/elixir_hot_swap_code.markdown delete mode 100644 blag/content/blog/elixir_otp_releases.markdown delete mode 100644 blag/content/blog/git-in-reverse.markdown delete mode 100644 blag/content/blog/git-resurrecting-history.markdown delete mode 100644 blag/content/blog/hunk_editing.markdown delete mode 100644 blag/content/blog/vim_tips_2015_03_17.markdown delete mode 100644 blag/content/blog/vim_tips_2015_05_07.markdown delete mode 100644 blag/layouts/404.html delete mode 100644 blag/layouts/_default/list.html delete mode 100644 blag/layouts/_default/single.html delete mode 100644 blag/layouts/blog/li.html delete mode 100644 blag/layouts/blog/single.html delete mode 100644 blag/layouts/blog/summary.html delete mode 100644 blag/layouts/index.html delete mode 100644 blag/layouts/partials/footer.html delete mode 100644 blag/layouts/partials/head_includes.html delete mode 100644 blag/layouts/partials/header.html delete mode 100644 blag/layouts/partials/meta.html delete mode 100644 blag/layouts/partials/subheader.html delete mode 100644 blag/layouts/rss.xml delete mode 100644 blag/layouts/shortcodes/video.html delete mode 100644 blag/layouts/single.html delete mode 100644 blag/layouts/sitemap.xml delete mode 100644 blag/static/css/site.css delete mode 100644 blag/static/favicon.ico delete mode 100644 blag/static/media/SentimentAnalysisTopology.png delete mode 100644 blag/static/media/code-branching.png delete mode 100644 blag/static/media/coreboot-x230-1.png delete mode 100644 blag/static/media/coreboot-x230-2.png delete mode 100644 blag/static/media/coreboot-x230-3.png delete mode 100644 blag/static/media/git-branching-1.png delete mode 100644 blag/static/media/git-commit-1.png delete mode 100644 blag/static/media/git-ff-merge-1.png delete mode 100644 blag/static/media/git-ff-merge-2.png delete mode 100644 blag/static/media/git-repo-state-1.svg delete mode 100644 blag/static/media/git-repo-state-2.svg delete mode 100644 blag/static/media/git-repo-state-3.svg delete mode 100644 blag/static/media/git-repo-state-4.svg delete mode 100644 blag/static/media/git-repo-state-5.svg delete mode 100644 blag/static/media/git-resolve-merge.png delete mode 100644 blag/static/media/git-tree-1.png delete mode 100644 blag/static/media/git-tree-2.png delete mode 100644 blag/static/media/spark_issues_chart.png delete mode 100644 blag/static/media/storm_issues_chart.png delete mode 100644 blag/static/media/videos/comment.ogg delete mode 100644 blag/static/media/videos/cw.ogg delete mode 100644 blag/static/media/videos/indent.ogg delete mode 100644 blag/static/media/videos/macros.ogg delete mode 100644 blag/static/robots.txt diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..94a9ed0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/Makefile b/Makefile index cb9bc4a..0a2d7c4 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,3 @@ build: container blog container: @docker build -t ${TAG} . - -blog: - @$(MAKE) -C blag diff --git a/README.markdown b/README.markdown index 0ea34c8..3b5bd41 100644 --- a/README.markdown +++ b/README.markdown @@ -1,23 +1,11 @@ # kennyballou.com # -Website and blog of Kenny Ballou powered by [Hugo][1]. +Website and configuration used for kennyballou.com. ## License ## -The following files and directories including their contents are Copyright -Kenny Ballou, or their respective copyright holders: +All directories and file are made available as Free Software under the GNU +[General Public License][1] (version 3 or later), unless clearly designated +otherwise. For more information, see the accompanying `LICENSE`. -* blag/content - -* blag/static/favicon.ico - -* blag/static/media/ - -All other directories and file are made availabe as Free Software under the GNU -General Public License (version 3 or later), unless clearly designated -otherwise. - -Feel free to use the HTML or CSS as you please. If you do use anything, a link -back to https://github.com/kennyballou/kennyballou.com would be appreciated. - -[1]: http://gohugo.io +[1]: https://www.gnu.org/licenses/gpl.html diff --git a/blag/Makefile b/blag/Makefile deleted file mode 100644 index c3483a4..0000000 --- a/blag/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -OUTPUT=`pwd`/../www/blog/ -all: build - -.PHONY: build -build: - @hugo -d ${OUTPUT} - -.PHONY: deploy -deploy: build - @rsync -avz ${OUTPUT} kennyballou.com:/srv/www/blog/ - -.PHONY: serve -serve: - -hugo -d ${OUTPUT} serve diff --git a/blag/config.yaml b/blag/config.yaml deleted file mode 100644 index 7a96afc..0000000 --- a/blag/config.yaml +++ /dev/null @@ -1,14 +0,0 @@ ---- -baseurl: "https://kennyballou.com" -MetaDataFormat: "yaml" -languageCode: "en-us" -title: "~kballou" -indexes: - tag: "tags" - topic: "topics" -permalinks: - blog: /blog/:year/:month/:slug -copyright: "This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License." -author: - name: "Kenny Ballou" -... diff --git a/blag/content/about-me.markdown b/blag/content/about-me.markdown deleted file mode 100644 index 2d2424d..0000000 --- a/blag/content/about-me.markdown +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: "About Me" -keywords: [] -tags: [] -pubdate: "2014-10-10" -date: "2015-08-24" -lastmod: "2016-01-19" -topics: [] -slug: about-me ---- - -I am a life-long learner, developer, mathematician, and overall thinker. I -enjoy solving problems and learning about technologies and discussing new and -different ideas. - -I graduated from Boise State University, majoring in Applied Mathematics and -minored in Computer Science. I've been programming professionally for nearly a -decade and currently am working as a software developer for zData Inc. in -Boise, Idaho. - -Outside of developing professionally, I am active in the open source community, -contributing where I can. - -When I am not developing, I enjoy reading, learning, and shredding the local -mountains. - -Sometimes I `:() { :|:& };:`. - -Other times I `import this`. - -The one I choose today: `fork: retry: Resource temporarily unavailable`. diff --git a/blag/content/blog/Spark.markdown b/blag/content/blog/Spark.markdown deleted file mode 100644 index e3d7b6f..0000000 --- a/blag/content/blog/Spark.markdown +++ /dev/null @@ -1,738 +0,0 @@ ---- -title: "Real-Time Streaming with Apache Spark Streaming" -description: "Overview of Apache Spark and a sample Twitter Sentiment Analysis" -tags: - - "Apache Spark" - - "Apache Kafka" - - "Apache" - - "Java" - - "Sentiment Analysis" - - "Real-time Streaming" - - "ZData Inc." -date: "2014-08-18" -catagories: - - "Apache" - - "Development" - - "Real-time Systems" -slug: "real-time-streaming-apache-spark-streaming" ---- - -This is the second post in a series on real-time systems tangential to the -Hadoop ecosystem. [Last time][6], we talked about [Apache Kafka][5] and Apache -Storm for use in a real-time processing engine. Today, we will be exploring -Apache Spark (Streaming) as part of a real-time processing engine. - -## About Spark ## - -[Apache Spark][1] is a general purpose, large-scale processing engine, recently -fully inducted as an Apache project and is currently under very active -development. As of this writing, Spark is at version 1.0.2 and 1.1 will be -released some time soon. - -Spark is intended to be a drop in replacement for Hadoop MapReduce providing -the benefit of improved performance. Combining Spark with its related projects -and libraries -- [Spark SQL (formerly Shark)][14], [Spark Streaming][15], -[Spark MLlib][16], [GraphX][17], among others -- and a very capable and -promising processing stack emerges. Spark is capable of reading from HBase, -Hive, Cassandra, and any HDFS data source. Not to mention the many external -libraries that enable consuming data from many more sources, e.g., hooking -Apache Kafka into Spark Streaming is trivial. Further, the Spark Streaming -project provides the ability to continuously compute transformations on data. - -### Resilient Distributed Datasets ### - -Apache Spark's primitive type is the Resilient Distributed Dataset (RDD). All -transformations, `map`, `join`, `reduce`, etc., in Spark revolve around this -type. RDD's can be created in one of three ways: _parallelizing_ (distributing -a local dataset); reading a stable, external data source, such as an HDFS file; -or transformations on existing RDD's. - -In Java, parallelizing may look like: - - List data = Arrays.asList(1, 2, 3, 4, 5); - JavaRDD distData = sc.parallelize(data); - -Where `sc` defines the Spark context. - -Similarly, reading a file from HDFS may look like: - - JavaRDD distFile = sc.textFile("hdfs:///data.txt"); - -The resiliency of RDD's comes from their [lazy][34] materialization and the -information required to enable this lazy nature. RDD's are not always fully -materialized but they _do_ contain enough information (their linage) to be -(re)created from a stable source [[Zaharia et al.][32]]. - -RDD's are distributed among the participating machines, and RDD transformations -are coarse-grained -- the same transformation will be applied to _every_ -element in an RDD. The number of partitions in an RDD is generally defined by -the locality of the stable source, however, the user may control this number -via repartitioning. - -Another important property to mention, RDD's are actually immutable. This -immutability can be illustrated with [Spark's][27] Word Count example: - - JavaRDD file = sc.textFile("hdfs:///data.txt"); - JavaRDD words = file.flatMap( - new FlatMapFunction() { - public Iterable call(String line) { - return Arrays.asList(line.split(" ")); - } - } - ); - JavaPairRDD pairs = words.map( - new PairFunction() { - public Tuple2 call(String word) { - return new Tuple2(word, 1); - } - } - ); - JavaPairRDD counts = pairs.reduceByKey( - new Function2() { - public Integer call(Integer a, Integer b) { return a + b; } - } - ); - counts.saveAsTextFile("hdfs:///data_counted.txt"); - -This is the canonical word count example, but here is a brief explanation: load -a file into an RDD, split the words into a new RDD, map the words into pairs -where each word is given a count (one), then reduce the counts of each word by -a key, in this case the word itself. Notice, each operation, `map`, `flatMap`, -`reduceByKey`, creates a _new_ RDD. - -To bring all these properties together, Resilient Distributed Datasets are -read-only, lazy distributed sets of elements that can have a chain of -transformations applied to them. They facilitate resiliency by storing lineage -graphs of the transformations (to be) applied and they [parallelize][35] the -computations by partitioning the data among the participating machines. - -### Discretized Streams ### - -Moving to Spark Streaming, the primitive is still RDD's. However, there is -another type for encapsulating a continuous stream of data: Discretized Streams -or DStreams. DStreams are defined as sequences of RDD's. A DStream is created -from an input source, such as Apache Kafka, or from the transformation of -another DStream. - -Turns out, programming against DStreams is _very_ similar to programming -against RDD's. The same word count code can be slightly modified to create a -streaming word counter: - - JavaReceiverInputDStream lines = ssc.socketTextStream("localhost", 9999); - JavaDStream words = lines.flatMap( - new FlatMapFunction() { - public Iterable call(String line) { - return Arrays.asList(line.split(" ")); - } - } - ); - JavaPairDStream pairs = words.map( - new PairFunction() { - public Tuple2 call(String word) { - return new Tuple2(word, 1); - } - } - ); - JavaPairDStream counts = pairs.reduceByKey( - new Function2() { - public Integer call(Integer a, Integer b) { return a + b; } - } - ); - counts.print(); - -Notice, really the only change between first example's code is the return -types. In the streaming context, transformations are working on streams of -RDD's, Spark handles applying the functions (that work against data in the -RDD's) to the RDD's in the current batch/ DStream. - -Though programming against DStreams is similar, there are indeed some -differences as well. Chiefly, DStreams also have _statefull_ transformations. -These include sharing state between batches/ intervals and modifying the -current frame when aggregating over a sliding window. - ->The key idea is to treat streaming as a series of short batch jobs, and bring ->down the latency of these jobs as much as possible. This brings many of the ->benefits of batch processing models to stream processing, including clear ->consistency semantics and a new parallel recovery technique... -[[Zaharia et al.][33]] - -### Hadoop Requirements ### - -Technically speaking, Apache Spark does [_not_][30] require Hadoop to be fully -functional. In a cluster setting, however, a means of sharing files between -tasks will need to be facilitated. This could be accomplished through [S3][8], -[NFS][9], or, more typically, HDFS. - -### Running Spark Applications ### - -Apache Spark applications can run in [standalone mode][18] or be managed by -[YARN][10]([Running Spark on YARN][19]), [Mesos][11]([Running Spark on -Mesos][20]), and even [EC2][28]([Running Spark on EC2][29]). Furthermore, if -running under YARN or Mesos, Spark does not need to be installed to work. That -is, Spark code can execute on YARN and Mesos clusters without change to the -cluster. - -### Language Support ### - -Currently, Apache Spark supports the Scala, Java, and Python programming -languages. Though, this post will only be discussing examples in Java. - -### Initial Thoughts ### - -Getting away from the idea of directed acyclic graphs (DAG's) is -- may be -- -both a bit of a leap and a benefit. Although it is perfectly acceptable to -define Spark's transformations altogether as a DAG, this can feel awkward when -developing Spark applications. Describing the transformations as [Monadic][13] -feels much more natural. Of course, a monad structure fits the DAG analogy -quite well, especially when considered in some of the physical analogies such -as assembly lines. - -Java's, and consequently Spark's, type strictness was an initial hurdle to -get accustomed. But overall, this is good. It means the compiler will catch a -lot of issues with transformations early. - -Depending on Scala's `Tuple[\d]` classes feels second-class, but this is only a -minor tedium. It's too bad current versions of Java don't have good classes for -this common structure. - -YARN and Mesos integration is a very nice benefit as it allows full stack -analytics to not oversubscribe clusters. Furthermore, it gives the ability to -add to existing infrastructure without overloading the developers and the -system administrators with _yet another_ computational suite and/ or resource -manager. - -On the negative side of things, dependency hell can creep into Spark projects. -Your project and Spark (and possibly Spark's dependencies) may depend on a -common artifact. If the versions don't [converge][21], many subtle problems can -emerge. There is an [experimental configuration option][4] to help alleviate -this problem, however, for me, it caused more problems than solved. - -## Test Project: Twitter Stream Sentiment Analysis ## - -To really test Spark (Streaming), a Twitter Sentiment Analysis project was -developed. It's almost a direct port of the [Storm code][3]. Though there is an -external library for hooking Spark directly into Twitter, Kafka is used so a -more precise comparison of Spark and Storm can be made. - -When the processing is finished, the data are written to HDFS and posted to a -simple NodeJS application. - -### Setup ### - -The setup is the same as [last time][6]: 5 node Vagrant virtual cluster with -each node running 64 bit CentOS 6.5, given 1 core, and 1024MB of RAM. Every -node is running HDFS (datanode), YARN worker nodes (nodemanager), ZooKeeper, -and Kafka. The first node, `node0`, is the namenode and resource manager. -`node0` is also running a [Docker][7] container with a NodeJS application for -reporting purposes. - -### Application Overview ### - -This project follows a very similar process structure as the Storm Topology -from last time. - -{{< figure src="/media/SentimentAnalysisTopology.png" - alt="Sentiment Analysis Topology" >}} - -However, each node in the above graph is actually a transformation on the -current DStream and not an individual process (or group of processes). - -This test project similarly uses the same [simple Kafka producer][22] -developed. This Kafka producer will be how data are ingested by the system. - -[A lot of this overview will be a rehashing of last time.] - -#### Kafka Receiver Stream #### - -The data processed is received from a Kafka Stream and is implemented via the -[external Kafka][23] library. This process simply creates a connection to the -Kafka broker(s), consuming messages from the given set of topics. - -##### Stripping Kafka Message IDs ##### - -It turns out the messages from Kafka are retuned as tuples, more specifically -pairs, with the message ID and the message content. Before continuing, the -message ID is stripped and the Twitter JSON data is passed down the pipeline. - -#### Twitter Data JSON Parsing #### - -As was the case last time, the important parts (tweet ID, tweet text, and -language code) need to be extracted from the JSON. Furthermore, this project -only parses English tweets. Non-English tweets are filtered out at this stage. - -#### Filtering and Stemming #### - -Many tweets contain messy or otherwise unnecessary characters and punctuation -that can be safely ignored. Moreover, there may also be many common words that -cannot be reliably scored either positively or negatively. At this stage, these -symbols and _stop words_ should be filtered. - -#### Classifiers #### - -Both the Positive classifier and the Negative classifier are in separate `map` -transformations. The implementation of both follows the [Bag-of-words][25] -model. - -#### Joining and Scoring #### - -Because the classifiers are done separately and a join is contrived, the next -step is to join the classifier scores together and actually declare a winner. -It turns out this is quite trivial to do in Spark. - -#### Reporting: HDFS and HTTP POST #### - -Finally, once the tweets are joined and scored, the scores need to be reported. -This is accomplished by writing the final tuples to HDFS and posting a JSON -object of the tuple to a simple NodeJS application. - -This process turned out to not be as awkward as was the case with Storm. The -`foreachRDD` function of DStreams is a natural way to do side-effect inducing -operations that don't necessarily transform the data. - -### Implementing the Kafka Producer ### - -See the [post][6] from last time for the details of the Kafka producer; this -has not changed. - -### Implementing the Spark Streaming Application ### - -Diving into the code, here are some of the primary aspects of this project. The -full source of this test application can be found on [Github][24]. - -#### Creating Spark Context, Wiring Transformation Chain #### - -The Spark context, the data source, and the transformations need to be defined. -Proceeding, the context needs to be started. This is all accomplished with the -following code: - - SparkConf conf = new SparkConf() - .setAppName("Twitter Sentiment Analysis"); - - if (args.length > 0) - conf.setMaster(args[0]); - else - conf.setMaster("local[2]"); - - JavaStreamingContext ssc = new JavaStreamingContext( - conf, - new Duration(2000)); - - Map topicMap = new HashMap(); - topicMap.put(KAFKA_TOPIC, KAFKA_PARALLELIZATION); - - JavaPairReceiverInputDStream messages = - KafkaUtils.createStream( - ssc, - Properties.getString("rts.spark.zkhosts"), - "twitter.sentimentanalysis.kafka", - topicMap); - - JavaDStream json = messages.map( - new Function, String>() { - public String call(Tuple2 message) { - return message._2(); - } - } - ); - - JavaPairDStream tweets = json.mapToPair( - new TwitterFilterFunction()); - - JavaPairDStream filtered = tweets.filter( - new Function, Boolean>() { - public Boolean call(Tuple2 tweet) { - return tweet != null; - } - } - ); - - JavaDStream> tweetsFiltered = filtered.map( - new TextFilterFunction()); - - tweetsFiltered = tweetsFiltered.map( - new StemmingFunction()); - - JavaPairDStream, Float> positiveTweets = - tweetsFiltered.mapToPair(new PositiveScoreFunction()); - - JavaPairDStream, Float> negativeTweets = - tweetsFiltered.mapToPair(new NegativeScoreFunction()); - - JavaPairDStream, Tuple2> joined = - positiveTweets.join(negativeTweets); - - JavaDStream> scoredTweets = - joined.map(new Function, - Tuple2>, - Tuple4>() { - public Tuple4 call( - Tuple2, Tuple2> tweet) - { - return new Tuple4( - tweet._1()._1(), - tweet._1()._2(), - tweet._2()._1(), - tweet._2()._2()); - } - }); - - JavaDStream> result = - scoredTweets.map(new ScoreTweetsFunction()); - - result.foreachRDD(new FileWriter()); - result.foreachRDD(new HTTPNotifierFunction()); - - ssc.start(); - ssc.awaitTermination(); - -Some of the more trivial transforms are defined in-line. The others are defined -in their respective files. - -#### Twitter Data Filter / Parser #### - -Parsing Twitter JSON data is one of the first transformations and is -accomplished with help of the [JacksonXML Databind][26] library. - - JsonNode root = mapper.readValue(tweet, JsonNode.class); - long id; - String text; - if (root.get("lang") != null && - "en".equals(root.get("lang").textValue())) - { - if (root.get("id") != null && root.get("text") != null) - { - id = root.get("id").longValue(); - text = root.get("text").textValue(); - return new Tuple2(id, text); - } - return null; - } - return null; - -The `mapper` (`ObjectMapper`) object is defined at the class level so it is not -recreated _for each_ RDD in the DStream, a minor optimization. - -You may recall, this is essentially the same code as [last time][6]. The only -difference really is that the tuple is returned instead of being emitted. -Because certain situations (e.g., non-English tweet, malformed tweet) return -null, the nulls will need to be filtered out. Thankfully, Spark provides a -simple way to accomplish this: - - JavaPairDStream filtered = tweets.filter( - new Function, Boolean>() { - public Boolean call(Tuple2 tweet) { - return tweet != null; - } - } - ); - -#### Text Filtering #### - -As mentioned before, punctuation and other symbols are simply discarded as they -provide little to no benefit to the classifiers: - - String text = tweet._2(); - text = text.replaceAll("[^a-zA-Z\\s]", "").trim().toLowerCase(); - return new Tuple2(tweet._1(), text); - -Similarly, common words should be discarded as well: - - String text = tweet._2(); - List stopWords = StopWords.getWords(); - for (String word : stopWords) - { - text = text.replaceAll("\\b" + word + "\\b", ""); - } - return new Tuple2(tweet._1(), text); - -#### Positive and Negative Scoring #### - -Each classifier is defined in its own class. Both classifiers are _very_ -similar in definition. - -The positive classifier is primarily defined by: - - String text = tweet._2(); - Set posWords = PositiveWords.getWords(); - String[] words = text.split(" "); - int numWords = words.length; - int numPosWords = 0; - for (String word : words) - { - if (posWords.contains(word)) - numPosWords++; - } - return new Tuple2, Float>( - new Tuple2(tweet._1(), tweet._2()), - (float) numPosWords / numWords - ); - -And the negative classifier: - - String text = tweet._2(); - Set negWords = NegativeWords.getWords(); - String[] words = text.split(" "); - int numWords = words.length; - int numPosWords = 0; - for (String word : words) - { - if (negWords.contains(word)) - numPosWords++; - } - return new Tuple2, Float>( - new Tuple2(tweet._1(), tweet._2()), - (float) numPosWords / numWords - ); - -Because both are implementing a `PairFunction`, a join situation is contrived. -However, this could _easily_ be defined differently such that one classifier is -computed, then the next, without ever needing to join the two together. - -#### Joining #### - -It turns out, joining in Spark is very easy to accomplish. So easy in fact, it -can be handled without virtually _any_ code: - - JavaPairDStream, Tuple2> joined = - positiveTweets.join(negativeTweets); - -But because working with a Tuple of nested tuples seems unwieldy, transform it -to a 4 element tuple: - - public Tuple4 call( - Tuple2, Tuple2> tweet) - { - return new Tuple4( - tweet._1()._1(), - tweet._1()._2(), - tweet._2()._1(), - tweet._2()._2()); - } - -#### Scoring: Declaring Winning Class #### - -Declaring the winning class is a matter of a simple map, comparing each class's -score and take the greatest: - - String score; - if (tweet._3() >= tweet._4()) - score = "positive"; - else - score = "negative"; - return new Tuple5( - tweet._1(), - tweet._2(), - tweet._3(), - tweet._4(), - score); - -This declarer is more optimistic about the neutral case but is otherwise very -straightforward. - -#### Reporting the Results #### - -Finally, the pipeline completes with writing the results to HDFS: - - if (rdd.count() <= 0) return null; - String path = Properties.getString("rts.spark.hdfs_output_file") + - "_" + - time.milliseconds(); - rdd.saveAsTextFile(path); - -And sending POST request to a NodeJS application: - - rdd.foreach(new SendPostFunction()); - -Where `SendPostFunction` is primarily given by: - - String webserver = Properties.getString("rts.spark.webserv"); - HttpClient client = new DefaultHttpClient(); - HttpPost post = new HttpPost(webserver); - String content = String.format( - "{\"id\": \"%d\", " + - "\"text\": \"%s\", " + - "\"pos\": \"%f\", " + - "\"neg\": \"%f\", " + - "\"score\": \"%s\" }", - tweet._1(), - tweet._2(), - tweet._3(), - tweet._4(), - tweet._5()); - - try - { - post.setEntity(new StringEntity(content)); - HttpResponse response = client.execute(post); - org.apache.http.util.EntityUtils.consume(response.getEntity()); - } - catch (Exception ex) - { - Logger LOG = Logger.getLogger(this.getClass()); - LOG.error("exception thrown while attempting to post", ex); - LOG.trace(null, ex); - } - -Each file written to HDFS _will_ have data in it, but the data written will be -small. A better batching procedure should be implemented so the files written -match the HDFS block size. - -Similarly, a POST request is opened _for each_ scored tweet. This can be -expensive on both the Spark Streaming batch timings and the web server -receiving the requests. Batching here could similarly improve overall -performance of the system. - -That said, writing these side-effects this way fits very naturally into the -Spark programming style. - -## Summary ## - -Apache Spark, in combination with Apache Kafka, has some amazing potential. And -not only in the Streaming context, but as a drop-in replacement for -traditional Hadoop MapReduce. This combination makes it a very good candidate -for a part in an analytics engine. - -Stay tuned, as the next post will be a more in-depth comparison between Apache -Spark and Apache Storm. - -## Related Links / References ## - -[1]: http://spark.apache.org/ - -* [Apache Spark][1] - -[2]: http://inside-bigdata.com/2014/07/15/theres-spark-theres-fire-state-apache-spark-2014/ - -* [State of Apache Spark 2014][2] - -[3]: https://github.com/zdata-inc/StormSampleProject - -* [Storm Sample Project][3] - -[4]: https://issues.apache.org/jira/browse/SPARK-939 - -* [SPARK-939][4] - -[5]: http://kafka.apache.org - -* [Apache Spark][5] - -[6]: https://kennyballou.com/blog/2014/07/real-time-streaming-storm-and-kafka - -* [Real-Time Streaming with Apache Storm and Apache Kafka][6] - -[7]: http://www.docker.io/ - -* [Docker IO Project Page][7] - -[8]: http://aws.amazon.com/s3/ - -* [Amazon S3][8] - -[9]: http://en.wikipedia.org/wiki/Network_File_System - -* [Network File System (NFS)][9] - -[10]: http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html - -* [Hadoop YARN][10] - -[11]: http://mesos.apache.org - -* [Apache Mesos][11] - -[12]: http://spark.apache.org/docs/latest/streaming-programming-guide.html - -* [Spark Streaming Programming Guide][12] - -[13]: http://en.wikipedia.org/wiki/Monad_(functional_programming) - -* [Monad][13] - -[14]: http://spark.apache.org/sql/ - -* [Spark SQL][14] - -[15]: http://spark.apache.org/streaming/ - -* [Spark Streaming][15] - -[16]: http://spark.apache.org/mllib/ - -* [MLlib][16] - -[17]: http://spark.apache.org/graphx/ - -* [GraphX][17] - -[18]: http://spark.apache.org/docs/latest/spark-standalone.html - -* [Spark Standalone Mode][18] - -[19]: http://spark.apache.org/docs/latest/running-on-yarn.html - -* [Running on YARN][19] - -[20]: http://spark.apache.org/docs/latest/running-on-mesos.html - -* [Running on Mesos][20] - -[21]: http://cupofjava.de/blog/2013/02/01/fight-dependency-hell-in-maven/ - -* [Fight Dependency Hell in Maven][21] - -[22]: https://github.com/zdata-inc/SimpleKafkaProducer - -* [Simple Kafka Producer][22] - -[23]: https://github.com/apache/spark/tree/master/external/kafka - -* [Spark: External Kafka Library][23] - -[24]: https://github.com/zdata-inc/SparkSampleProject - -* [Spark Sample Project][24] - -[25]: http://en.wikipedia.org/wiki/Bag-of-words_model - -* [Wikipedia: Bag-of-words][25] - -[26]: https://github.com/FasterXML/jackson-databind - -* [Jackson XML Databind Project][26] - -[27]: http://spark.apache.org/docs/latest/programming-guide.html - -* [Spark Programming Guide][27] - -[28]: http://aws.amazon.com/ec2/ - -* [Amazon EC2][28] - -[29]: http://spark.apache.org/docs/latest/ec2-scripts.html - -* [Running Spark on EC2][29] - -[30]: http://spark.apache.org/faq.html - -* [Spark FAQ][30] - -[31]: http://databricks.com/blog/2014/03/26/spark-sql-manipulating-structured-data-using-spark-2.html - -* [Future of Shark][31] - -[32]: https://www.usenix.org/system/files/conference/nsdi12/nsdi12-final138.pdf - -* [Resilient Distributed Datasets: A Fault-Tolerant Abstraction for In-Memory Cluster Computing (PDF)][32] - -[33]: https://www.usenix.org/system/files/conference/hotcloud12/hotcloud12-final28.pdf - -* [Discretized Streams: An Efficient and Fault-Tolerant Model for Stream Processing on Large Clusters (PDF)][33] - -[34]: http://en.wikipedia.org/wiki/Lazy_evaluation - -* [Wikipedia: Lazy evaluation][34] - -[35]: http://en.wikipedia.org/wiki/Data_parallelism - -* [Wikipedia: Data Parallelism][35] diff --git a/blag/content/blog/Storm-vs-Spark.markdown b/blag/content/blog/Storm-vs-Spark.markdown deleted file mode 100644 index e5f7776..0000000 --- a/blag/content/blog/Storm-vs-Spark.markdown +++ /dev/null @@ -1,461 +0,0 @@ ---- -title: "Apache Storm and Apache Spark Streaming" -description: "Comparison of Apache Storm and Apache Spark Streaming" -tags: - - "Apache Storm" - - "Apache Spark" - - "Apache" - - "Real-time Streaming" - - "ZData Inc." -date: "2014-09-08" -categories: - - "Apache" - - "Development" - - "Real-time Systems" -slug: "apache-storm-and-apache-spark" ---- - -This is the last post in the series on real-time systems. In the [first -post][3] we discussed [Apache Storm][1] and [Apache Kafka][5]. In the [second -post][4] we discussed [Apache Spark (Streaming)][3]. In both posts we examined -a small Twitter Sentiment Analysis program. Today, we will be reviewing both -systems: how they compare and how they contrast. - -The intention is not to cast judgment over one project or the other, but rather -to exposit the differences and similarities. Any judgments made, subtle or not, -are mistakes in exposition and/ or organization and are not actual endorsements -of either project. - -## Apache Storm ## - -"Storm is a distributed real-time computation system" [[1][1]]. Apache Storm is -a [task parallel][7] continuous computational engine. It defines its workflows -in Directed Acyclic Graphs (DAG's) called "topologies". These topologies run -until shutdown by the user or encountering an unrecoverable failure. - -Storm does not natively run on top of typical Hadoop clusters, it uses -[Apache ZooKeeper][8] and its own master/ minion worker processes to -coordinate topologies, master and worker state, and the message guarantee -semantics. That said, both [Yahoo!][9] and [Hortonworks][10] are working on -providing libraries for running Storm topologies on top of Hadoop 2.x YARN -clusters. Furthermore, Storm can run on top of the [Mesos][11] scheduler as -well, [natively][12] and with help from the [Marathon][13] framework. - -Regardless though, Storm can certainly still consume files from HDFS and/ or -write files to HDFS. - -## Apache Spark (Streaming) ## - -"Apache Spark is a fast and general purpose engine for large-scale data -processing" [[2][2]]. [Apache Spark][2] is a [data parallel][8] general purpose -batch processing engine. Workflows are defined in a similar and reminiscent -style of MapReduce, however, is much more capable than traditional Hadoop -MapReduce. Apache Spark has its Streaming API project that allows for -continuous processing via short interval batches. Similar to Storm, Spark -Streaming jobs run until shutdown by the user or encounter an unrecoverable -failure. - -Apache Spark does not itself require Hadoop to operate. However, its data -parallel paradigm requires a shared filesystem for optimal use of stable data. -The stable source can range from [S3][14], [NFS][15], or, more typically, -[HDFS][16]. - -Executing Spark applications does not _require_ Hadoop YARN. Spark has its own -standalone master/ server processes. However, it is common to run Spark -applications using YARN containers. Furthermore, Spark can also run on Mesos -clusters. - -## Development ## - -As of this writing, Apache Spark is a full, top level Apache project. Whereas -Apache Storm is currently undergoing incubation. Moreover, the latest stable -version of Apache Storm is `0.9.2` and the latest stable version of Apache -Spark is `1.0.2` (with `1.1.0` to be released in the coming weeks). Of course, -as the Apache Incubation reminder states, this does not strictly reflect -stability or completeness of either project. It is, however, a reflection to -the state of the communities. Apache Spark operations and its process are -endorsed by the [Apache Software Foundation][27]. Apache Storm is working on -stabilizing its community and development process. - -Spark's `1.x` version does state that the API has stabilized and will not be -doing major changes undermining backward compatibility. Implicitly, Storm has -no guaranteed stability in its API, however, it is [running in production for -many different companies][34]. - -### Implementation Language ### - -Both Apache Spark and Apache Storm are implemented in JVM based languages: -[Scala][19] and [Clojure][20], respectively. - -Scala is a functional meets object-oriented language. In other words, the -language carries ideas from both the functional world and the object-oriented -world. This yields an interesting mix of code reusability, extensibility, and -higher-order functions. - -Clojure is a dialect of [Lisp][21] targeting the JVM providing the Lisp -philosophy: code-as-data and providing the rich macro system typical of Lisp -languages. Clojure is predominately functional in nature, however, if state or -side-effects are required, they are facilitated with a transactional memory -model, aiding in making multi-threaded based applications consistent and safe. - -#### Message Passing Layer #### - -Until version `0.9.x`, Storm was using the Java library [JZMQ][22] for -[ZeroMQ][23] messages. However, Storm has since moved the default messaging -layer to [Netty][24] with efforts from [Yahoo!][25]. Although Netty is now -being used by default, users can still use ZeroMQ, if desired, since the -migration to Netty was intended to also make the message layer pluggable. - -Spark, on the other hand, uses a combination of [Netty][24] and [Akka][26] for -distributing messages throughout the executors. - -### Commit Velocity ### - -As a reminder, these data are included not to cast judgment on one project or -the other, but rather to exposit the fluidness of each project. The continuum -of the dynamics of both projects can be used as an argument for or against, -depending on application requirements. If rigid stability is a strong -requirement, arguing for a slower commit velocity may be appropriate. - -Source of the following statistics were taken from the graphs at -[GitHub](https://github.com/) and computed from [this script][38]. - -#### Spark Commit Velocity #### - -Examining the graphs from -[GitHub](https://github.com/apache/spark/graphs/commit-activity), over the last -month (as of this writing), there have been over 330 commits. The previous -month had about 340. - -#### Storm Commit Velocity #### - -Again examining the commit graphs from -[GitHub](https://github.com/apache/incubator-storm/graphs/commit-activity), -over the last month (as of this writing), there have been over 70 commits. The -month prior had over 130. - -### Issue Velocity ### - -Sourcing the summary charts from JIRA, we can see that clearly Spark has a huge -volume of issues reported and closed in the last 30 days. Storm, roughly, an -order of magnitude less. - -Spark Open and Closed JIRA Issues (last 30 days): - -{{< figure src="/media/spark_issues_chart.png" - link="https://issues.apache.org/jira/browse/SPARK/" - alt="Apache Spark JIRA issues" >}} - -Storm Open and Closed JIRA Issues (last 30 days): - -{{< figure src="/media/storm_issues_chart.png" - link="https://issues.apache.org/jira/browse/STORM/" - alt="Apache Storm JIRA issues" >}} - -### Contributor/ Community Size ### - -#### Storm Contributor Size #### - -Sourcing the reports from -[GitHub](https://github.com/apache/incubator-storm/graphs/contributors), Storm -has over a 100 contributors. This number, though, is just the unique number of -people who have committed at least one patch. - -Over the last 60 days, Storm has seen 34 unique contributors and 16 over the -last 30. - -#### Spark Contributor Size #### - -Similarly sourcing the reports from [GitHub](https://github.com/apache/spark), -Spark has roughly 280 contributors. A similar note as before must be made about -this number: this is the number of at least one patch contributors to the -project. - -Apache Spark has had over 140 contributors over the last 60 days and 94 over -the last 30 days. - -## Development Friendliness ## - -### Developing for Storm ### - -* Describing the process structure with DAG's feels natural to the - [processing model][7]. Each node in the graph will transform the data in a - certain way, and the process continues, possibly disjointly. - -* Storm tuples, the data passed between nodes in the DAG, have a very natural - interface. However, this comes at a cost to compile-time type safety. - -### Developing for Spark ### - -* Spark's monadic expression of transformations over the data similarly feels - natural in this [processing model][6]; this falls in line with the idea - that RDD's are lazy and maintain transformation lineages, rather than - actuallized results. - -* Spark's use of Scala Tuples can feel awkward in Java, and this awkwardness - is only exacerbated with the nesting of generic types. However, this - awkwardness does come with the benefit of compile-time type checks. - - - Furthermore, until Java 1.8, anonymous functions are inherently - awkward. - - - This is probably a non-issue if using Scala. - -## Installation / Administration ## - -Installation of both Apache Spark and Apache Storm are relatively straight -forward. Spark may be simpler in some regards, however, since it technically -does not _need_ to be installed to function on YARN or Mesos clusters. The -Spark application will just require the Spark assembly be present in the -`CLASSPATH`. - -Storm, on the other hand, requires ZooKeeper to be properly installed and -running on top of the regular Storm binaries that must be installed. -Furthermore, like ZooKeeper, Storm should run under [supervision][35]; -installation of a supervisor service, e.g., [supervisord][28], is recommended. - -With respect to installation, supporting projects like Apache Kafka are out of -scope and have no impact on the installation of either Storm or Spark. - -## Processing Models ## - -Comparing Apache Storm and Apache Spark's Streaming, turns out to be a bit -challenging. One is a true stream processing engine that can do micro-batching, -the other is a batch processing engine which micro-batches, but cannot perform -streaming in the strictest sense. Furthermore, the comparison between streaming -and batching isn't exactly a subtle difference, these are fundamentally -different computing ideas. - -### Batch Processing ### - -[Batch processing][31] is the familiar concept of processing data en masse. The -batch size could be small or very large. This is the processing model of the -core Spark library. - -Batch processing excels at processing _large_ amounts of stable, existing data. -However, it generally incurs a high-latency and is completely unsuitable for -incoming data. - -### Event-Stream Processing ### - -[Stream processing][32] is a _one-at-a-time_ processing model; a datum is -processed as it arrives. The core Storm library follows this processing model. - -Stream processing excels at computing transformations as data are ingested with -sub-second latencies. However, with stream processing, it is incredibly -difficult to process stable data efficiently. - -### Micro-Batching ### - -Micro-batching is a special case of batch processing where the batch size is -orders smaller. Spark Streaming operates in this manner as does the Storm -[Trident API][33]. - -Micro-batching seems to be a nice mix between batching and streaming. However, -micro-batching incurs a cost of latency. If sub-second latency is paramount, -micro-batching will typically not suffice. On the other hand, micro-batching -trivially gives stateful computation, making [windowing][37] an easy task. - -## Fault-Tolerance / Message Guarantees ## - -As a result of each project's fundamentally different processing models, the -fault-tolerance and message guarantees are handled differently. - -### Delivery Semantics ### - -Before diving into each project's fault-tolerance and message guarantees, here -are the common delivery semantics: - -* At most once: messages may be lost but never redelivered. - -* At least once: messages will never be lost but may be redelivered. - -* Exactly once: messages are never lost and never redelivered, perfect - message delivery. - -### Apache Storm ### - -To provide fault-tolerant messaging, Storm has to keep track of each and every -record. By default, this is done with at least once delivery semantics. -Storm can be configured to provide at most once and exactly once. The delivery -semantics offered by Storm can incur latency costs; if data loss in the stream -is acceptable, at most once delivery will improve performance. - -### Apache Spark Streaming ### - -The resiliency built into Spark RDD's and the micro-batching yields a trivial -mechanism for providing fault-tolerance and message delivery guarantees. That -is, since Spark Streaming is just small-scale batching, exactly once delivery -is a trivial result for each batch; this is the _only_ delivery semantic -available to Spark. However some failure scenarios of Spark Streaming degrade -to at least once delivery. - -## Applicability ## - -### Apache Storm ### - -Some areas where Storm excels include: near real-time analytics, natural -language processing, data normalization and [ETL][36] transformations. It also -stands apart from traditional MapReduce and other course-grained technologies -yielding fine-grained transformations allowing very flexible processing -topologies. - -### Apache Spark Streaming ### - -Spark has an excellent model for performing iterative machine learning and -interactive analytics. But Spark also excels in some similar areas of Storm -including near real-time analytics, ingestion. - -## Final Thoughts ## - -Generally, the requirements will dictate the choice. However, here are some -major points to consider when choosing the right tool: - -* Latency: Is the performance of the streaming application paramount? Storm - can give sub-second latency much more easily and with less restrictions - than Spark Streaming. - -* Development Cost: Is it desired to have similar code bases for batch - processing _and_ stream processing? With Spark, batching and streaming are - _very_ similar. Storm, however, departs dramatically from the MapReduce - paradigm. - -* Message Delivery Guarantees: Is there high importance on processing _every_ - single record, or is some nominal amount of data loss acceptable? - Disregarding everything else, Spark trivially yields perfect, exactly once - message delivery. Storm can provide all three delivery semantics, but - getting perfect exactly once message delivery requires more effort to - properyly achieve. - -* Process Fault Tolerance: Is high-availability of primary concern? Both - systems actually handle fault-tolerance of this kind really well and in - relatively similar ways. - - - Production Storm clusters will run Storm processes under - [supervision][35]; if a process fails, the supervisor process will - restart it automatically. State management is handled through - ZooKeeper. Processes restarting will reread the state from ZooKeeper on - an attempt to rejoin the cluster. - - - Spark handles restarting workers via the resource manager: YARN, Mesos, - or its standalone manager. Spark's standalone resource manager handles - master node failure with standby-masters and ZooKeeper. Or, this can be - handled more primatively with just local filesystem state - checkpointing, not typically recommended for production environments. - -Both Apache Spark Streaming and Apache Storm are great solutions that solve the -streaming ingestion and transformation problem. Either system can be a great -choice for part of an analytics stack. Choosing the right one is simply a -matter of answering the above questions. - -## References ## - -[spark_jira_issues]: https://kennyballou.com/media/spark_issues_chart.png - -[storm_jira_issues]: https://kennyballou.com/media/storm_issues_chart.png - -[1]: http://storm.incubator.apache.org/documentation/Home.html - -* [Apache Storm Home Page][1] - -[2]: http://spark.apache.org - -* [Apache Spark][2] - -[3]: http://www.zdatainc.com/2014/07/real-time-streaming-apache-storm-apache-kafka/ - -* [Real Time Streaming with Apache Storm and Apache Kafka][3] - -[4]: http://www.zdatainc.com/2014/08/real-time-streaming-apache-spark-streaming/ - -* [Real Time Streaming with Apache Spark (Streaming)][4] - -[5]: http://kafka.apache.org/ - -* [Apache Kafka][5] - -[6]: http://en.wikipedia.org/wiki/Data_parallelism - -* [Wikipedia: Data Parallelism][6] - -[7]: http://en.wikipedia.org/wiki/Task_parallelism - -* [Wikipedia: Task Parallelism][7] - -[8]: http://zookeeper.apache.org - -* [Apache ZooKeeper][8] - -[9]: https://github.com/yahoo/storm-yarn - -* [Yahoo! Storm-YARN][9] - -[10]: http://hortonworks.com/kb/storm-on-yarn-install-on-hdp2-beta-cluster/ - -* [Hortonworks: Storm on YARN][10] - -[11]: http://mesos.apache.org - -* [Apache Mesos][11] - -[12]: https://mesosphere.io/learn/run-storm-on-mesos/ - -* [Run Storm on Mesos][12] - -[13]: https://github.com/mesosphere/marathon - -* [Marathon][13] - -[14]: http://aws.amazon.com/s3/ - -[15]: http://en.wikipedia.org/wiki/Network_File_System - -[16]: http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html - -[17]: https://issues.apache.org/jira/browse/STORM/ - -[18]: https://issues.apache.org/jira/browse/SPARK/ - -[19]: http://www.scala-lang.org/ - -[20]: http://clojure.org/ - -[21]: http://en.wikipedia.org/wiki/Lisp_(programming_language) - -[22]: https://github.com/zeromq/jzmq - -[23]: http://zeromq.org/ - -[24]: http://netty.io/ - -[25]: http://yahooeng.tumblr.com/post/64758709722/making-storm-fly-with-netty - -[26]: http://akka.io - -[27]: http://www.apache.org/ - -[28]: http://supervisord.org - -[29]: http://xinhstechblog.blogspot.com/2014/06/storm-vs-spark-streaming-side-by-side.html - -* [Storm vs Spark Streaming: Side by Side][29] - -[30]: http://www.slideshare.net/ptgoetz/apache-storm-vs-spark-streaming - -* [Storm vs Spark Streaming (Slideshare)][30] - -[31]: http://en.wikipedia.org/wiki/Batch_processing - -[32]: http://en.wikipedia.org/wiki/Event_stream_processing - -[33]: https://storm.incubator.apache.org/documentation/Trident-API-Overview.html - -[34]: http://storm.incubator.apache.org/documentation/Powered-By.html - -[35]: http://en.wikipedia.org/wiki/Process_supervision - -[36]: http://en.wikipedia.org/wiki/Extract,_transform,_load - -[37]: http://en.wikipedia.org/wiki/Window_function_(SQL)#Window_function - -[38]: https://gist.github.com/kennyballou/c6ff37e5eef6710794a6 diff --git a/blag/content/blog/Storm.markdown b/blag/content/blog/Storm.markdown deleted file mode 100644 index 7223738..0000000 --- a/blag/content/blog/Storm.markdown +++ /dev/null @@ -1,593 +0,0 @@ ---- -title: "Real-Time Streaming with Apache Storm and Apache Kafka" -descritption: "Overview of Apache Storm and sample Twitter Sentiment Analysis" -tags: - - "Apache Storm" - - "Apache Kafka" - - "Apache" - - "Java" - - "Sentiment Analysis" - - "Real-time Streaming" - - "ZData Inc." -date: "2014-07-16" -categories: - - "Apache" - - "Development" - - "Real-time Systems" -slug: "real-time-streaming-storm-and-kafka" ---- - -The following post is one in the series of real-time systems tangential -to the Hadoop ecosystem.  First, exploring both Apache Storm and Apache -Kafka as a part of a real-time processing engine. These two systems work -together very well and make for an easy development experience while -still being very performant. - -## About Kafka ## - -[Apache Kafka][3] is a message queue rethought as a distributed commit log. It -follows the publish-subscribe messaging style, with speed and durability built -in. - -Kafka uses Zookeeper to share and save state between brokers. Each broker -maintains a set of partitions: primary and/ or secondary for each topic. A set -of Kafka brokers working together will maintain a set of topics. Each topic has -its partitions distributed over the participating Kafka brokers and, as of -Kafka version 0.8, the replication factor determines, intuitively, the number -of times a partition is duplicated for fault tolerance. - -While many brokered message queue systems have the broker maintain the state of -its consumers, Kafka does not. This frees up resources for the broker to ingest -data faster. For more information about Kafka's performance see [Benchmarking -Kafka][4]. - -### Initial Thoughts ### - -Kafka is a very promising project, with astounding throughput and one of -the easiest pieces of software I have had the joy of installing and -configuring. Although Kafka is not at the production 1.0 stable release yet, -it's well on its way. - -## About Storm ## - -[Apache Storm][1], currently in incubation, is a real-time computational engine -made available under the free and open-source Apache version 2.0 license. It -runs continuously, consuming data from the configured sources (Spouts) and -passes the data down the processing pipeline (Bolts). Combined, Spouts and -Bolts make a Topology. A topology can be written in any language including any -JVM based language, Python, Ruby, Perl, or, with some work, even C [[2][2]]. - -### Why Storm ### - -Quoting from the project site: - -> Storm has many use cases: realtime analytics, online machine learning, -> continuous computation, distributed RPC, ETL, and more. Storm is fast: a -> benchmark clocked it at over a million tuples processed per second per node. -> It is scalable, fault-tolerant, guarantees your data will be processed, and -> is easy to set up and operate. [[1][1]] - -### Integration ### - -Storm can integrate with any queuing and any database system. In fact, there -are already quite a few existing projects for use to integrate Storm with other -projects, like Kestrel or Kafka[[5][5]]. - -### Initial Thoughts ### - -I found Storm's verbiage around the computational pipeline to fit my mental -model very well, thinking about streaming computational processes as directed -acyclic graphs makes a lot of intuitive sense. That said, although I haven't -been developing against Storm for very long, I do find some integration tasks -to be slightly awkward. For example, writing an HDFS file writer bolt -requires some special considerations given bolt life cycles and HDFS writing -patterns. This is really only a minor blemish however, as it only means the -developers of Storm topologies have to understand the API more intimately; -there are already common patterns emerging that should be adaptable to about -any situation [[16][16]]. - -## Test Project: Twitter Stream Sentiment Analysis ## - -To really give Storm a try, something a little more involved than just a simple -word counter is needed. Therefore, I have put together a Twitter Sentiment -Analysis topology. Though this is a good representative example of a more -complicated topology, the method used for actually scoring the Twitter data is -overly simple. - -### Setup ### - -The setup used for this demo is a 5 node Vagrant virtual cluster. Each node is -running 64 bit CentOS 6.5, given 1 core, and 1024MB of RAM. Every node is -running HDFS (datanode), Zookeeper, and Kafka. The first node, `node0`, is the -namenode, and Nimbus -- Storm's master daemon. `node0` is also running a -[Docker][7] container with a NodeJS application, part of the reporting process. -The remaining nodes, `node[1-4]`, are Storm worker nodes. Storm, Kafka, and -Zookeeper are all run under supervision via [Supervisord][6], so -High-Availability is baked into this virtual cluster. - -### Overview ### - -{{< figure src="/media/SentimentAnalysisTopology.png" - alt="Sentiment Analysis Topology">}} - -I wrote a simple Kafka producer that reads files off disk and sends them to the -Kafka cluster. This is how we feed the whole system and is used in lieu of -opening a stream to Twitter. - -#### Spout #### - -The orange node from the picture is our [`KafkaSpout`][8] that will be -consuming incoming messages from the Kafka brokers. - -#### Twitter Data JSON Parsing #### - -The first bolt, `2` in the image, attempts to parse the Twitter JSON data and -emits `tweet_id` and `tweet_text`. This implementation only processes English -tweets. If the topology were to be more ambitious, it may pass the language -code down and create different scoring bolts for each language. - -#### Filtering and Stemming #### - -This next bolt, `3`, performs first-round data sanitization. That is, it -removes all non-alpha characters. - -Following, the next round of data cleansing, `4`, is to remove common words -to reduce noise for the classifiers. These common words are usually referred to -as _stop words_. [_Stemming_][15], or considering suffixes to match the root, -could also be performed here, or in another, later bolt. - -`4`, when finished, will then broadcast the data to both of the classifiers. - -#### Classifiers #### - -Each classifier is defined by its own bolt. One classifier for the positive -class, `5`, and another for the negative class,`6`. - -The implementation of the classifiers follows the [Bag-of-words][12] model. - -#### Join and Scoring #### - -Next, bolt `7` joins the scores from the two previous classifiers. The -implementation of this bolt isn't perfect. For example, message transaction is -not guaranteed: if one-side of the join fails, neither side is notified. - -To finish up the scoring, bolt `8` compares the scores from the classifiers and -declares the class accordingly. - -#### Reporting: HDFS and HTTP POST #### - -Finally, the scoring bolt broadcasts off the results to a HDFS file writer -bolt, `9`, and a NodeJS notifier bolt, `10`. The HDFS bolt fills a list until -it has 1000 records in it and then spools to disk. Of course, like the join -bolt, this could be better implemented to fail input tuples if the bolt fails -to write to disk or have a timeout to write to disk after a certain -period of time. The NodeJs notifier bolt, on the other hand, sends a HTTP POST -each time a record is received. This could be batched as well, but again, this -is for demonstration purposes only. - -### Implementing the Kafka Producer ### - -Here, the main portions of the code for the Kafka producer that was used to -test our cluster are defined. - -In the main class, we setup the data pipes and threads: - - LOGGER.debug("Setting up streams"); - PipedInputStream send = new PipedInputStream(BUFFER_LEN); - PipedOutputStream input = new PipedOutputStream(send); - - LOGGER.debug("Setting up connections"); - LOGGER.debug("Setting up file reader"); - BufferedFileReader reader = new BufferedFileReader(filename, input); - LOGGER.debug("Setting up kafka producer"); - KafkaProducer kafkaProducer = new KafkaProducer(topic, send); - - LOGGER.debug("Spinning up threads"); - Thread source = new Thread(reader); - Thread kafka = new Thread(kafkaProducer); - - source.start(); - kafka.start(); - - LOGGER.debug("Joining"); - kafka.join(); - -The `BufferedFileReader` in its own thread reads off the data from disk: - - rd = new BufferedReader(new FileReader(this.fileToRead)); - wd = new BufferedWriter(new OutputStreamWriter(this.outputStream, ENC)); - int b = -1; - while ((b = rd.read()) != -1) - { - wd.write(b); - } - -Finally, the `KafkaProducer` sends asynchronous messages to the Kafka Cluster: - - rd = new BufferedReader(new InputStreamReader(this.inputStream, ENC)); - String line = null; - producer = new Producer(conf); - while ((line = rd.readLine()) != null) - { - producer.send(new KeyedMessage(this.topic, line)); - } - -Doing these operations on separate threads gives us the benefit of having disk -reads not block the Kafka producer or vice-versa, enabling maximum performance -tunable by the size of the buffer. - -### Implementing the Storm Topology ### - -#### Topology Definition #### - -Moving onward to Storm, here we define the topology and how each bolt will be -talking to each other: - - TopologyBuilder topology = new TopologyBuilder(); - - topology.setSpout("kafka_spout", new KafkaSpout(kafkaConf), 4); - - topology.setBolt("twitter_filter", new TwitterFilterBolt(), 4) - .shuffleGrouping("kafka_spout"); - - topology.setBolt("text_filter", new TextFilterBolt(), 4) - .shuffleGrouping("twitter_filter"); - - topology.setBolt("stemming", new StemmingBolt(), 4) - .shuffleGrouping("text_filter"); - - topology.setBolt("positive", new PositiveSentimentBolt(), 4) - .shuffleGrouping("stemming"); - topology.setBolt("negative", new NegativeSentimentBolt(), 4) - .shuffleGrouping("stemming"); - - topology.setBolt("join", new JoinSentimentsBolt(), 4) - .fieldsGrouping("positive", new Fields("tweet_id")) - .fieldsGrouping("negative", new Fields("tweet_id")); - - topology.setBolt("score", new SentimentScoringBolt(), 4) - .shuffleGrouping("join"); - - topology.setBolt("hdfs", new HDFSBolt(), 4) - .shuffleGrouping("score"); - topology.setBolt("nodejs", new NodeNotifierBolt(), 4) - .shuffleGrouping("score"); - -Notably, the data is shuffled to each bolt until except when joining, as it's -very important that the same tweets are given to the same instance of the -joining bolt. To read more about stream groupings, visit the [Storm -documentation][17]. - -#### Twitter Data Filter / Parser #### - -Parsing the Twitter JSON data is one of the first things that needs to be done. -This is accomplished with the use of the [JacksonXML Databind][11] library. - - JsonNode root = mapper.readValue(json, JsonNode.class); - long id; - String text; - if (root.get("lang") != null && - "en".equals(root.get("lang").textValue())) - { - if (root.get("id") != null && root.get("text") != null) - { - id = root.get("id").longValue(); - text = root.get("text").textValue(); - collector.emit(new Values(id, text)); - } - else - LOGGER.debug("tweet id and/ or text was null"); - } - else - LOGGER.debug("Ignoring non-english tweet"); - -As mentioned before, `tweet_id` and `tweet_text` are the only values being -emitted. - -This is just using the basic `ObjectMapper` class from the Jackson Databind -library to map the simple JSON object provided by the Twitter Streaming API to -a `JsonNode`. The language code is first tested to be English, as the topology -does not support non-English tweets. The new tuple is emitted down the Storm -pipeline after ensuring the existence of the desired data, namely, `tweet_id`, -and `tweet_text`. - -#### Text Filtering #### - -As previously mentioned, punctuation and other symbols are removed because they -are just noise to the classifiers: - - Long id = input.getLong(input.fieldIndex("tweet_id")); - String text = input.getString(input.fieldIndex("tweet_text")); - text = text.replaceAll("[^a-zA-Z\\s]", "").trim().toLowerCase(); - collector.emit(new Values(id, text)); - -_Very_ common words are also removed because they are similarly noisy to the -classifiers: - - Long id = input.getLong(input.fieldIndex("tweet_id")); - String text = input.getString(input.fieldIndex("tweet_text")); - List stopWords = StopWords.getWords(); - for (String word : stopWords) - { - text = text.replaceAll(word, ""); - } - collector.emit(new Values(id, text)); - -Here the `StopWords` class is a singleton holding the list of words we -wish to omit. - -#### Positive and Negative Scoring #### - -Since the approach for scoring is based on a very limited [Bag-of-words][12] -model, Each class is put into its own bolt; this also contrives the need for a -join later. - -Positive Scoring: - - Long id = input.getLong(input.fieldIndex("tweet_id")); - String text = input.getString(input.fieldIndex("tweet_text")); - Set posWords = PositiveWords.getWords(); - String[] words = text.split(" "); - int numWords = words.length; - int numPosWords = 0; - for (String word : words) - { - if (posWords.contains(word)) - numPosWords++; - } - collector.emit(new Values(id, (float) numPosWords / numWords, text)); - -Negative Scoring: - - Long id = input.getLong(input.fieldIndex("tweet_id")); - String text = input.getString(input.fieldIndex("tweet_text")); - Set negWords = NegativeWords.getWords(); - String[] words = text.split(" "); - int numWords = words.length; - int numNegWords = 0; - for (String word : words) - { - if (negWords.contains(word)) - numNegWords++; - } - collector.emit(new Values(id, (float)numNegWords / numWords, text)); - -Similar to `StopWords`, `PositiveWords` and `NegativeWords` are both singletons -maintaining lists of positive and negative words, respectively. - -#### Joining Scores #### - -Joining in Storm isn't the most straight forward process to implement, although -the process may be made simpler with the use of the [Trident API][13]. However, -to get a feel for what to do without Trident and as an Academic exercise, the -join is not implemented with the Trident API. On related note, this join -isn't perfect! It ignores a couple of issues, namely, it does not correctly -fail a tuple on a one-sided join (when tweets are received twice from the same -scoring bolt) and doesn't timeout tweets if they are left in the queue for too -long. With this in mind, here is our join: - - Long id = input.getLong(input.fieldIndex("tweet_id")); - String text = input.getString(input.fieldIndex("tweet_text")); - if (input.contains("pos_score")) - { - Float pos = input.getFloat(input.fieldIndex("pos_score")); - if (this.tweets.containsKey(id)) - { - Triple triple = this.tweets.get(id); - if ("neg".equals(triple.getCar())) - emit(collector, id, triple.getCaar(), pos, triple.getCdr()); - else - { - LOGGER.warn("one sided join attempted"); - this.tweets.remove(id); - } - } - else - this.tweets.put( - id, - new Triple("pos", pos, text)); - } - else if (input.contains("neg_score")) - { - Float neg = input.getFloat(input.fieldIndex("neg_score")); - if (this.tweets.containsKey(id)) - { - Triple triple = this.tweets.get(id); - if ("pos".equals(triple.getCar())) - emit(collector, id, triple.getCaar(), neg, triple.getCdr()); - else - { - LOGGER.warn("one sided join attempted"); - this.tweets.remove(id); - } - } - else - this.tweets.put( - id, - new Triple("neg", neg, text)); - } - -Where `emit` is defined simply by: - - private void emit( - BasicOutputCollector collector, - Long id, - String text, - float pos, - float neg) - { - collector.emit(new Values(id, pos, neg, text)); - this.tweets.remove(id); - } - -#### Deciding the Winning Class #### - -To ensure the [Single responsibility principle][14] is not violated, joining -and scoring are split into separate bolts, though the class of the tweet could -certainly be decided at the time of joining. - - Long id = tuple.getLong(tuple.fieldIndex("tweet_id")); - String text = tuple.getString(tuple.fieldIndex("tweet_text")); - Float pos = tuple.getFloat(tuple.fieldIndex("pos_score")); - Float neg = tuple.getFloat(tuple.fieldIndex("neg_score")); - String score = pos > neg ? "positive" : "negative"; - collector.emit(new Values(id, text, pos, neg, score)); - -This decider will prefer negative scores, so if there is a tie, it's -automatically handed to the negative class. - -#### Report the Results #### - -Finally, there are two bolts that will yield results: one that writes -data to HDFS, and another to send the data to a web server. - - Long id = input.getLong(input.fieldIndex("tweet_id")); - String tweet = input.getString(input.fieldIndex("tweet_text")); - Float pos = input.getFloat(input.fieldIndex("pos_score")); - Float neg = input.getFloat(input.fieldIndex("neg_score")); - String score = input.getString(input.fieldIndex("score")); - String tweet_score = - String.format("%s,%s,%f,%f,%s\n", id, tweet, pos, neg, score); - this.tweet_scores.add(tweet_score); - if (this.tweet_scores.size() >= 1000) - { - writeToHDFS(); - this.tweet_scores = new ArrayList(1000); - } - -Where `writeToHDFS` is primarily given by: - - Configuration conf = new Configuration(); - conf.addResource(new Path("/opt/hadoop/etc/hadoop/core-site.xml")); - conf.addResource(new Path("/opt/hadoop/etc/hadoop/hdfs-site.xml")); - hdfs = FileSystem.get(conf); - file = new Path( - Properties.getString("rts.storm.hdfs_output_file") + this.id); - if (hdfs.exists(file)) - os = hdfs.append(file); - else - os = hdfs.create(file); - wd = new BufferedWriter(new OutputStreamWriter(os, "UTF-8")); - for (String tweet_score : tweet_scores) - { - wd.write(tweet_score); - } - -And our `HTTP POST` to a web server: - - Long id = input.getLong(input.fieldIndex("tweet_id")); - String tweet = input.getString(input.fieldIndex("tweet_text")); - Float pos = input.getFloat(input.fieldIndex("pos_score")); - Float neg = input.getFloat(input.fieldIndex("neg_score")); - String score = input.getString(input.fieldIndex("score")); - HttpPost post = new HttpPost(this.webserver); - String content = String.format( - "{\"id\": \"%d\", " + - "\"text\": \"%s\", " + - "\"pos\": \"%f\", " + - "\"neg\": \"%f\", " + - "\"score\": \"%s\" }", - id, tweet, pos, neg, score); - - try - { - post.setEntity(new StringEntity(content)); - HttpResponse response = client.execute(post); - org.apache.http.util.EntityUtils.consume(response.getEntity()); - } - catch (Exception ex) - { - LOGGER.error("exception thrown while attempting post", ex); - LOGGER.trace(null, ex); - reconnect(); - } - -There are some faults to point out with both of these bolts. Namely, the HDFS -bolt tries to batch the writes into 1000 tweets, but does not keep track of the -tuples nor does it time out tuples or flush them at some interval. That is, if -a write fails or if the queue sits idle for too long, the topology is not -notified and can't resend the tuples. Similarly, the `HTTP POST`, does not -batch and sends each POST synchronously causing the bolt to block for each -message. This can be alleviated with more instances of this bolt and more web -servers to handle the increase in posts, and/ or a better batching process. - -## Summary ## - -The full source of this test application can be found on [Github][9]. - -Apache Storm and Apache Kafka both have great potential in the real-time -streaming market and have so far proven themselves to be very capable systems -for performing real-time analytics. - -Stay tuned, as the next post in this series will be an overview look at -Streaming with Apache Spark. - -## Related Links / References ## - -[1]: http://storm.incubator.apache.org/ - -* [Apache Storm Project Page][1] - -[2]: http://storm.incubator.apache.org/about/multi-language.html - -* [Storm Multi-Language Documentation][2] - -[3]: http://kafka.apache.org/ - -* [Apache Kafka Project Page][3] - -[4]: http://engineering.linkedin.com/kafka/benchmarking-apache-kafka-2-million-writes-second-three-cheap-machines - -* [LinkedIn Kafka Benchmarking: 2 million writes per second][4] - -[5]: http://storm.incubator.apache.org/about/integrates.html - -* [Storm Integration Documentation][5] - -[6]: http://supervisord.org/ - -* [Supervisord Project Page][6] - -[7]: http://www.docker.io/ - -* [Docker IO Project Page][7] - -[8]: https://github.com/apache/incubator-storm/tree/master/external/storm-kafka - -* [Storm-Kafka Source][8] - -[9]: https://github.com/zdata-inc/StormSampleProject - -* [Full Source of Test Project][9] - -[10]: https://wiki.apache.org/incubator/StormProposal - -* [Apache Storm Incubation Proposal][10] - -[11]: https://github.com/FasterXML/jackson-databind - -* [Jackson Databind Project Bag][11] - -[12]: http://en.wikipedia.org/wiki/Bag-of-words_model - -* [Wikipedia: Bag of words][12] - -[13]: http://storm.incubator.apache.org/documentation/Trident-API-Overview.html - -* [Storm Trident API Overview][13] - -[14]: http://en.wikipedia.org/wiki/Single_responsibility_principle - -* [Wikipedia: Single responsibility principle][14] - -[15]: http://en.wikipedia.org/wiki/Stemming - -* [Wikipedia: Stemming][15] - -[16]: http://storm.incubator.apache.org/documentation/Common-patterns.html - -* [Storm Documentation: Common Patterns][16] - -[17]: http://storm.incubator.apache.org/documentation/Concepts.html#stream-groupings - -* [Stream Groupings][17] diff --git a/blag/content/blog/coreboot-x230.markdown b/blag/content/blog/coreboot-x230.markdown deleted file mode 100644 index 5193e53..0000000 --- a/blag/content/blog/coreboot-x230.markdown +++ /dev/null @@ -1,498 +0,0 @@ ---- -title: "Coreboot for x230" -description: "Getting Coreboot onto Lenovo x230" -tags: - - "Coreboot" - - "GNU/Linux" - - "BIOS" - - "Hardware" - - "Lenovo" - - "x230" - - "Raspberry-Pi" -date: "2017-01-31" -categories: - - "Guides" - - "Notes" -slug: "coreboot-x230" ---- - -{{< - figure src="/media/coreboot-x230-1.png" - alt="Disassembled Laptop" - width="70%" ->}} - -In this post, we will go through the steps to get [coreboot][1] compiled and -installed on a [Lenovo x230][2] laptop. This is a somewhat lengthy and involved -process that is not for the faint of heart. *It is very possible to ruin or -otherwise brick your laptop performing these steps improperly or even -properly!* You have been warned. - -## Coreboot ## - -> coreboot is an extended firmware platform that delivers a lightning fast and -> secure boot experience on modern computers and embedded systems ---[coreboot project][1] - -Coreboot is an OpenSource firmware alternative that supports a number of modern -computers and embedded systems. It can replace your system's [BIOS][28] with a -faster and more secure platform. It can be preloaded with a number of -payloads, e.g., [SeaBIOS][29] or [tianocore][30], and/or it can come with some -additional payloads, e.g., [memtest86+][31]. - -## Motivation ## - -Why replace the default BIOS image in the first place? There are several -motivations for doing this. For one, it's [well documented][5] that Lenovo -installs a device whitelist onto its systems, disabling the computer if a -third-party peripheral is installed, which can include WiFi cards and SSD's. -If you're more adventurous and want to replace the x230 screen with that of an -x240, the whitelist will also get in the way. By replacing the BIOS entirely, -this whitelist problem will be avoided. - -Furthermore, in older laptops, x200/1 for example, it's possible to replace the -disastrous [Intel ME][6] platform. This is, unfortunately, (currently) -impossible on the x230 and later. That is, removing the ME code will make the -laptop effectively unusable. - -## Necessary Equipment ## - - -Before we go into the actual steps, let's take a moment to gather all the -necessary equipment. Disassembly is necessary because the BIOS chip is locked -and not accessible from software flashers like [flashrom][13]. However, -desoldering will not be necessary. - -* SPI Flash Programmer - - This guide will perform the ROM flashing via the GPIO headers of a - [Raspberry Pi 2][3] (RPI-1 should work, but different pinouts are - required). - -* [SOIC-8 Clip][4] - - This clip will be used for interfacing with the BIOS chip and the SPI - programmer. They are sometimes available for less (with longer shipping - times) from [eBay][20]. - -* Some [_short_ cables][21] - - These cables will connect the SOIC chip to the GPIO headers of the - Raspberry Pi. It is important that they are short, no more than 25cm or so. - -* [Plastic opening tools][27] - - After not having these for too long, I can't recommend these enough for - opening up laptops and other devices. - -* A precision Phillips screwdriver - - A percision set will be better, used for disassembling the laptop. - -* A magnifying lens - - The specific chip found in _your_ x230 may be different from mine. A - magnifying lens will be helpful in determining the exact version. - -## Disassembly and BIOS Access ## - -[Steps][11] and [manuals][12] for disassembling the laptop can be found with a -simple search. However, it's only necessary to remove the keyboard and the palm -rest to gain access to the BIOS chip. Of course, remove the battery and power -supply before opening the laptop. I personally, removed the hard drive and WiFi -card as well, I wanted nothing attached while working. - -{{< figure src="/media/coreboot-x230-2.png" alt="BIOS Chip" width="30%" >}} - -You will notice there are two chips in the above figure. The combination of -these two chips is what makes up the BIOS (and the Intel ME) for the x230. *We -will be dealing exclusively with the _top_ chip* (one closest to the screen). - -Once we have physical access to the top chip, use the magnifying glass to read -the _tiny_ print of the chip. We need to know the precise version of the chip -to remove any future guesswork from the process, especially for disaster -recovery. - -If you are unable to read the version of the chip, there are steps we can take -to proceed, but it will be far more tedious and less comfortable. - -### Connecting the Raspberry Pi to the SOIC Clip ### - -Next, we will be connecting the Raspberry Pi with the cables and clips to the -BIOS chip. - -> I found this to be the most difficult of the entire process. Finding a solid -> source for the documentation on the chip and the GPIO headers was incredibly -> difficult the first time around. - -First, get [GPIO header diagram][14] for your Raspberry Pi model. - -Next, cross-reference the header diagram with your chip's spec sheet. It should -be in the list at [All Data Sheet][15]. Specifically, I found mine [here][16]. -It's very likely, yours will be similar. Cross reference the "Pin -Configuration" page with the GPIO header diagram to discern the proper -connections. - -The pin arrangement that I used was the following (using the notch on the chip -for starting): - -* 1: GPIO 26 - -* 2: GPIO 19 - -* 3: Not Connected - -* 4: GPIO 17 - -* 5: GPIO 21 - -* 6: GPIO 23 - -* 7: Not Connected - -* 8: GPIO 25 - -### Using the Raspberry Pi ### - -{{< figure - src="/media/coreboot-x230-3.png" - alt="Raspberry Pi connected to x230 via SOIC clip" - width="70%" ->}} - -> Before connecting the clip, it's imperative to remove all external power -> sources. The Raspberry Pi will be providing power to the ROM chip, any -> external current can and most likely _will_ brick your laptop. - -Attach the clip to the chip and power on the Raspberry Pi. Before you are able -to read the chip, you may need to install [flashrom][13] and ensure your kernel -has SPI enabled. Most distributions will have it on by default. An easy way to -check is to list the contents of `/dev` and look for `spi` devices, since the -chip is connected, there should be one. - -Before we begin the process of flashing, let's inspect the ROM itself. First, -simply run flashrom, specifying the SPI device as the programmer: - - # flashrom --programmer linux_spi:dev=/dev/spidev0.0 - flashrom v0.9.9-r1955 on Linux 4.4.10-1-ARCH (armv7l) - flashrom is free software, get the source code at https://flashrom.org - - Calibrating delay loop... OK. - Found Macronix flash chip "MX25L3205(A)" (4096 kB, SPI) on linux_spi. - Found Macronix flash chip "MX25L3205D/MX25L3208D" (4096 kB, SPI) on linux_spi. - Found Macronix flash chip "MX25L3206E/MX25L3208E" (4096 kB, SPI) on linux_spi. - Found Macronix flash chip "MX25L3273E" (4096 kB, SPI) on linux_spi. - Multiple flash chip definitions match the detected chip(s): "MX25L3205(A)", "MX25L3205D/MX25L3208D", "MX25L3206E/MX25L3208E", "MX25L3273E" - Please specify which chip definition to use with the -c option. - -> If you are seeing numbers like 8192 kB, you're reading the wrong chip! -> Disconnect and attach to the other. - -If you were able to read the chip number, pass it along, and try again: - - # flashrom --programmer linux_spi:dev=/dev/spidev0.0 \ - --chip "MX25L3206E/MX25L3208E" - flashrom v0.9.9-r1955 on Linux 4.4.10-1-ARCH (armv7l) - flashrom is free software, get the source code at https://flashrom.org - - Calibrating delay loop... OK. - Found Macronix flash chip "MX25L3206E/MX25L3208E" (4096 kB, SPI) on linux_spi. - -Now, we will want to create a back up image of the ROM, but we also want to -verify we are reading correctly: - - # flashrom --programmer linux_spi:dev=/dev/spidev0.0 \ - --chip "MX25L3206E/MX25L3208E" \ - --read original.1.rom - flashrom v0.9.9-r1955 on Linux 4.4.10-1-ARCH (armv7l) - flashrom is free software, get the source code at https://flashrom.org - - Calibrating delay loop... OK. - Found Macronix flash chip "MX25L3206E/MX25L3208E" (4096 kB, SPI) on linux_spi. - Reading flash... done. - - # flashrom --programmer linux_spi:dev=/dev/spidev0.0 \ - --chip "MX25L3206E/MX25L3208E" \ - --read original.2.rom - flashrom v0.9.9-r1955 on Linux 4.4.10-1-ARCH (armv7l) - flashrom is free software, get the source code at https://flashrom.org - - Calibrating delay loop... OK. - Found Macronix flash chip "MX25L3206E/MX25L3208E" (4096 kB, SPI) on linux_spi. - Reading flash... done. - - # diff original.1.rom original.2.rom - -> Again, if the size of `original.1.rom` and `original.2.rom` are 8MB, you're -> reading the wrong chip, move the clip to the other chip and repeat the above -> steps! - -If you get no output from the last command, we should be set, or it means we're -reading both incorrectly. However, it's more likely flashrom will complain -first. - -Keep at least one of the images around just in case this fails and you need to -attempt recovery. - -> If you were unable to read the serial number off the chip, perform the read 4 -> to 8 times, once or twice for each chip type. - -## Configuration and Compilation ## - -Half the battle to getting Coreboot onto your system is properly putting -together the build-tools and compiling the coreboot image. There already exists -a [guide for configuring and building][8] the Coreboot tool-chain, but for -completeness, the basic steps will be copied here. - -> I'll assume a certain comfortability with GNU/Linux and the GNU GCC and Make -> tools. - -First up, get a copy of the [Coreboot Source][7]: - - % git clone --recursive https://review.coreboot.org/coreboot.git - -This will get the latest source code of the Coreboot project and also -initialize the project's submodules. - -Next, we will need to download the blobs archive: - - % curl -SLO https://www.coreboot.org/releases/coreboot-blobs-4.5.tar.xz - -> The link can be found from the [Coreboot Downloads][9] page. - -Now, unpack the blobs into the `coreboot/3rdparty/blobs` folder: - - % tar -xf coreboot-blobs-4.5.tar.xz --strip-components=1 -C coreboot - -Now, we can move onto configuring the tool-chain, building the tool-chain, and -finally building the coreboot image itself. - -### Configuration ### - - % cd coreboot - -We'll start by configuring the compile options for coreboot: - - ± make menuconfig - -OR - - ± make nconfig - -Set the following options: - -``` -general --| - |-[*] Compress ramstage with LZMA - |-[*] Include the coreboot .config file into the ROM image -mainboard -| - |-Mainboard vendor (Lenovo) - |-Mainboard model (ThinkPad X230) - |-ROM chip size (12288 KB (12 MB)) - |-(0x100000) Size of CBFS filesystem in ROM -devices ---| - |-[*] Use native graphics initialization -generic ---| - |-[*] PS/2 keyboard init -console ---| - |-[*] Squelch AP CPUs from early console. - |-[*] Send console output to a CBMEM buffer - |-[*] Send POST codes to an external device - |-[*] Send POST codes to an IO port -sys table -| - |-[*] Generate SMBIOS tables -payload ---| - |-Add a payload (SeaBIOS) - |-SeaBIOS version (master) - |-(10) PS/2 keyboard controller initialization timeout (milliseconds) - |-[*] Hardware init during option ROM execution - |-[*] Include generated option rom that implements legacy VGA BIOS compatibility - |-[*] Use LZMA compression for payloads -``` - -These configuration options were borrowed from [Unix Blather][26]. - -### Compilation ### - -> If you were thinking of compiling the ROM on the Pi, I recommend you -> reconsider. If you have an exorbitant amount of time to kill, go for it, but -> you'll prefer a machine with more power. - -From here, we can build the tool-chain: - - ± make crossgcc-x64 CPUS=$(nproc) - -This will only build the tool-chain for the x64 architecture, update as -necessary. - -`CPUS=#` is used to specify the parallelization of the tool-build. This is -unfortunately different from the usual `--jobs|-j` argument of `make`, but has -the same effect. - -Now, we can build the coreboot image itself: - - ± make -j$(nproc) - -This will create `build/coreboot.rom` image. - -However, this will *not* be the image we flash onto our laptop! Because the -Lenovo x230 comes with the [nasty Intel ME][6] and we built the coreboot image -using a stub for the Intel ME section, we need to create a new image that -contains only the Coreboot contents. To do this, we will use `dd` to skip the -first 8MB of the image, and only grab the last 4: - - ± dd if=build/coreboot.rom bs=1M of=/tmp/x230.rom skip=8 - -This will create a 4MB file in `/tmp/` named `x230.rom`. Finally, copy the new -image to the Raspberry Pi. - -## Flashing the New Image ## - -After the image is copied to the Pi, we can use flashrom to write the new -image: - - # flashrom --programmer linux_spi:dev=/dev/spidev0.0 - --chip "MX25L3206E/MX25L3208E" - --write /tmp/x230.rom - flashrom v0.9.9-r1955 on Linux 4.4.10-1-ARCH (armv7l) - flashrom is free software, get the source code at https://flashrom.org - - Calibrating delay loop... OK. - Found Macronix flash chip "MX25L3206E/MX25L3208E" (4096 kB, SPI) on linux_spi. - Reading old flash chip contents... done. - Erasing and writing flash chip... Erase/write done. - Verifying flash... VERIFIED. - -Flashrom will read back the new contents and verify it was successful, however, -I like the comfort of having done this myself. This can be accomplished two -ways: using flashrom's `--verify` option, or reading the image and running -`diff`: - - - # flashrom --programmer linux_spi:dev=/dev/spidev0.0 - --chip "MX25L3206E/MX25L3208E" - --verify /tmp/x230.rom - flashrom v0.9.9-r1955 on Linux 4.4.10-1-ARCH (armv7l) - flashrom is free software, get the source code at https://flashrom.org - - Calibrating delay loop... OK. - Found Macronix flash chip "MX25L3206E/MX25L3208E" (4096 kB, SPI) on linux_spi. - Reading old flash chip contents... done. - Verifying flash... VERIFIED. - -OR - - # flashrom --programmer linux_spi:dev=/dev/spidev0.0 \ - --chip "MX25L3206E/MX25L3208E" \ - --read /tmp/x230.2.rom - flashrom v0.9.9-r1955 on Linux 4.4.10-1-ARCH (armv7l) - flashrom is free software, get the source code at https://flashrom.org - - Calibrating delay loop... OK. - Found Macronix flash chip "MX25L3206E/MX25L3208E" (4096 kB, SPI) on linux_spi. - Reading flash... done. - - # diff /tmp/x230.rom /tmp/x230.2.rom - -If you get "VERIFIED" or no output, respectively, the contents of the BIOS chip -should be replaced with the Coreboot image. - -All that's next is to disconnect the chip, reassemble the laptop and hope it -works! - -## Common Problems ## - -If you're having issues flashing or reading your BIOS, check the following: - -* The chip is getting sufficient power - -* The wires used to connect the Raspberry Pi and the chip are not _too long_ - -* Make sure your pinout is correct - -For some more information, check Flashrom's [in system programming][25]. - -## Summary and Auxiliary Advice ## - -Hopefully, you're now booting into your x230 with Coreboot. Enjoy your new -BIOS, whitelist free and awesome! - -However, if you have issues, e.g., the flashing doesn't go as planned: *DO NOT -POWER OFF THE CHIP!* Get help from the [#coreboot][22] IRC channel on -[freenode][23] or [email the mailing list][24]. - -## References ## - -[1]: https://www.coreboot.org - -* [coreboot homepage][1] - -[2]: http://shop.lenovo.com/us/en/laptops/thinkpad/x-series/x230/ - -* [x230 product page][2] - -[3]: https://www.raspberrypi.org/products/raspberry-pi-2-model-b/ - -* [Raspberry Pi 2 Model B][3] - -[4]: https://www.sparkfun.com/products/13153 - -* [IC Test Clip - SOIC 8-Pin][4] - -[5]: https://duckduckgo.com/?q=lenovo+whitelist+bios - -[6]: https://en.wikipedia.org/wiki/Intel_Active_Management_Technology - -* [Intel Active Management Technology Wikipedia Post][6] - -[7]: https://www.coreboot.org/downloads.html - -[8]: https://www.coreboot.org/Build_HOWTO - -* [Coreboot Build HOWTO][8] - -[9]: https://www.coreboot.org/downloads.html - -* [Coreboot Downloads][9] - -[11]: http://www.myfixguide.com/manual/lenovo-thinkpad-x230-disassembly-clean-cooling-fan-remove-keyboard/ - -[12]: https://www.ifixit.com/Device/Lenovo_Thinkpad_x230 - -[13]: https://www.flashrom.org/Flashrom - -* [Flashrom Project Homepage][13] - -[14]: http://www.raspberrypi-spy.co.uk/wp-content/uploads/2014/07/Raspberry-Pi-GPIO-Layout-Worksheet.pdf - -* [Raspberry Pi GPIO Header Sheet B/B+][14] - -[15]: http://www.alldatasheet.com - -[16]: http://html.alldatasheet.com/html-pdf/575458/MCNIX/MX25L3208EM2I12G/1149/7/MX25L3208EM2I12G.html - -[20]: https://eBay.com - -[21]: https://www.amazon.com/Elegoo-120pcs-Multicolored-Breadboard-arduino/dp/B01EV70C78 - -[22]: irc://irc.freenode.net/coreboot - -[23]: https://freenode.net/ - -[24]: https://www.coreboot.org/Mailinglist - -[25]: https://www.flashrom.org/ISP - -[26]: https://www.ericholzbach.net/blog/x230_coreboot/ - -* [Unix Blather: Coreboot on the Lenovo x230][26] - -[27]: https://www.amazon.com/Professional-Non-Abrasive-Spudgers-Anti-Static-Tweezers/dp/B00PHNMEMC - -[28]: https://en.wikipedia.org/wiki/BIOS - -[29]: https://www.seabios.org/SeaBIOS - -[30]: http://www.tianocore.org/ - -[31]: http://www.memtest.org/ diff --git a/blag/content/blog/distributed_systems.markdown b/blag/content/blog/distributed_systems.markdown deleted file mode 100644 index 27705ea..0000000 --- a/blag/content/blog/distributed_systems.markdown +++ /dev/null @@ -1,188 +0,0 @@ ---- -title: "Yet Another Page on Readings in Distributed Systems" -description: "My own list of links, articles, papers, etc. I enjoyed reading -about distributed systems" -tags: - - "Distributed Systems" - - "Readings" -date: "2015-05-08" -updated: "2015-05-12" -categories: - - "Distributed Systems" -slug: "readings-in-distributed-systems" ---- - -> "Distributed systems are hard." --Everyone. - -This page is dedicated to general discussion of distributed systems, references -to general overviews and the like. Distributed systems are difficult and even -the well established ones aren't [bulletproof][1]. How can we make this better? -As SysAdmins? As Developers? First we can attempt to understand some of the -issues related to designing and implementing distributed systems. Then we can -throw all that out and figure out what *really* happens to distributed systems. - -## Recommended Reading ## - -### General ### - -* [Fallacies of Distributed Computing][2] - -* [CAP Theorem][3] - - - [LYSEFGG: Distribunomicon: My other cap is a theorem][4] - - - For a more entertaining introduction to CAP, Hebert's ''Learn You Some - Erlang for Great Good'' has a really good subsection on the topic that - includes the zombie apocalypse and some introduction to how a blend - between AP and CP systems can be achieved. - - - [CAP Theorem Proof][5] - - - [You can't sacrifice partition tolerance][6] - -* [Consistency Model][7] - - - [List of Consistency Models][8] - - - [Linearizability][9] - - - [Linearizability versus Serializability][10] - - - [Eventual Consistency][11] - -* [Paxos][12] - - - [Understanding Paxos (Part 1)][13] - - - [Lessons learned from implementing Paxos (2013)][34] - -* [Vector Clock][14] - -* [Split-Brain][15] - -* [Network Partitions][16] - -* [Distributed Systems and the End of the API][17] - -* [The Log][18]: What every software engineer should know about real time - data's unifying abstraction - -The [Jepsen][19] "Call me maybe" articles are really good, well written essays -on topics and technologies related to distributed systems. - -Introductory post to the "Call me maybe" series: - -* [Call me maybe][20] - -Here are some personal recommendations: - -* [The Network is Reliable][1] - -* [Strong Consistency Models][21] - -* [Asynchronous Replication with Failover][22] - -Really anything from Ferd Herbert is good. Particularly, the first and last -chapters of [Erlang In Anger][30] which includes longer essays from his blog -posts. - -* [Queues Don't Fix Overload][31] - -* [It's About the Guarantees][32] - -* [Lessons Learned while Working on Large-Scale Server Software][33] - -### General Networking ### - -* [TCP incast][29] - -### Hadoop ecosystem ### - -This link is more specific to HDFS and is a rather limited experiment but -nonetheless a good read to further understand partition issues that can arise -in Hadoop systems: - -* [Partition Tolerance in HDFS][23] - -More links from the [Jepsen essays][19]: - -* [Call me maybe: Zookeeper][24] - -* [Call me maybe: Kafka][25] - -* [Call me maybe: Cassandra][26] - -### Databases ### - -- [Wikipedia ACID][27] - -- [Call me maybe: Postgres][28] - -[1]: http://aphyr.com/posts/288-the-network-is-reliable - -[2]: http://en.wikipedia.org/wiki/Fallacies_of_Distributed_Computing - -[3]: http://en.wikipedia.org/wiki/CAP_theorem - -[4]: http://learnyousomeerlang.com/distribunomicon#my-other-cap-is-a-theorem - -[5]: http://lpd.epfl.ch/sgilbert/pubs/BrewersConjecture-SigAct.pdf - -[6]: http://codahale.com/you-cant-sacrifice-partition-tolerance/ - -[7]: http://en.wikipedia.org/wiki/Consistency_model - -[8]: http://en.wikipedia.org/wiki/Category:Consistency_models - -[9]: http://en.wikipedia.org/wiki/Linearizability - -[10]: http://www.bailis.org/blog/linearizability-versus-serializability/ - -[11]: http://en.wikipedia.org/wiki/Eventual_consistency - -[12]: http://en.wikipedia.org/wiki/Paxos_(computer_science) - -[13]: http://distributedthoughts.wordpress.com/2013/09/22/understanding-paxos-part-1/ - -[14]: http://en.wikipedia.org/wiki/Vector_clock - -[15]: http://en.wikipedia.org/wiki/Split-brain_(computing) - -[16]: http://en.wikipedia.org/wiki/Network_partitioning - -[17]: https://speakerdeck.com/cemerick/distributed-systems-and-the-end-of-the-api - -[18]: http://engineering.linkedin.com/distributed-systems/log-what-every-software-engineer-should-know-about-real-time-datas-unifying - -[19]: http://aphyr.com/tags/jepsen - -[20]: http://aphyr.com/posts/281-call-me-maybe - -[21]: http://aphyr.com/posts/313-strong-consistency-models - -[22]: http://aphyr.com/posts/287-asynchronous-replication-with-failover - -[23]: https://www.growse.com/2014/07/18/partition-tolerance-and-hadoop-part-1-hdfs/ - -[24]: http://aphyr.com/posts/291-call-me-maybe-zookeeper - -[25]: http://aphyr.com/posts/293-call-me-maybe-kafka - -[26]: http://aphyr.com/posts/294-call-me-maybe-cassandra - -[27]: http://en.wikipedia.org/wiki/ACID - -[28]: http://aphyr.com/posts/282-call-me-maybe-postgres - -[29]: http://www.snookles.com/slf-blog/2012/01/05/tcp-incast-what-is-it/ - -[30]: http://www.erlang-in-anger.com/ - -[31]: http://ferd.ca/queues-don-t-fix-overload.html - -[32]: http://ferd.ca/it-s-about-the-guarantees.html - -[33]: http://ferd.ca/lessons-learned-while-working-on-large-scale-server-software.html - -[34]: http://blog.willportnoy.com/2012/06/lessons-learned-from-paxos.html diff --git a/blag/content/blog/elixir_hot_swap_code.markdown b/blag/content/blog/elixir_hot_swap_code.markdown deleted file mode 100644 index 7bbab06..0000000 --- a/blag/content/blog/elixir_hot_swap_code.markdown +++ /dev/null @@ -1,598 +0,0 @@ ---- -title: "Elixir/Erlang Hot Swapping Code" -description: "Hot code reloading with Elixir and Erlang" -tags: - - "Erlang/OTP" - - "Elixir" - - "Hot Swapping Code" - - "How-to" - - "distillery" -date: "2016-12-07" -categories: - - "Development" -slug: "elixir-hot-swapping" ---- - -{{}} - -> Warning, there be black magic here. - -One of the untold benefits of having a runtime is the ability for that runtime -to enable loading and unloading code while the runtime is active. Since the -runtime is itself, essentially, a virtual machine with its own operating system -and process scheduling, it has the ability to start and stop, load and unload -processes and code similar to how "real" operating systems do. - -This enables some spectacular power in terms of creating deployments and -rolling out those deployments. That is, if we can provide a particular artifact -for the runtime to load and replace the running system with, we can instruct it -to upgrade our system(s) _without_ restarting them, without interrupting our -services or affecting users of those systems. Furthermore, if we constrain the -system and make a few particular assumptions, this can all happen nearly -instantaneously. For example, Erlang releases happen in seconds because of the -functional approach taken by the language, this compared to other systems like -[Docker][13] and/or [Kubernetes][14] which may take several minutes or hours -to transition a version because there is no safe assumptions to make about -running code. - -This post will be a small tour through how Elixir and Erlang can perform code -hot swapping, and how this can be useful for deployments. - -## Hot Code Swapping: Basics ## - -There are several functions defined in the [`:sys`][5] and [`:code`][6] modules -that are required for this first example. Namely, the following functions: - -* `:code.load_file/1` - -* `:sys.suspend/1` - -* `:sys.change_code/4` - -* `:sys.resume/1` - -The `:sys.suspend/1` function takes a single parameter, the Process ID (PID) of -the process to suspend, similarly, `:sys.resume` also takes a PID of the -process to resume. The `:code.load_file/1` function, unfortunately named, takes -a single parameter: the _module_ to load into memory. Finally, the -`:sys.change_code` function takes four parameters: `name`, `module`, -`old_version`, and `extra`. The `name` is the PID or the registered atom of the -process. The `extra` argument is a reserved parameter for each process, it's -the same `extra` that will be passed to the restarted process's `code_change/3` -function. - -### Example ### - -Let's assume we have a particularly simple module, say `KV`, similar to the -following: - -```elixir -defmodule KV do - use GenServer - - @vsn 0 - - def start_link() do - GenServer.start_link(__MODULE__, [], name: __MODULE__) - end - - def init(_) do - {:ok, %{}} - end - - def get(key, default \\ nil) do - GenServer.call(__MODULE__, {:get, key, default}) - end - - def put(key, value) do - GenServer.call(__MODULE__, {:put, key, value}) - end - - def handle_call({:get, key, default}, _caller, state) do - {:reply, Map.get(state, key, default), state} - end - - def handle_call({:put, key, value}, _caller, state) do - {:reply, :ok, Map.put(state, key, value)} - end - -end -``` - -Save this into a file, say, `kv.ex`. Next we will compile it and load it into -an `iex` session: - -``` -% elixirc kv.ex -% iex -iex> l KV -{:module, KV} -``` - -We can start the process and try it out: - -``` -iex> KV.start_link -{:ok, #PID<0.84.0>} -iex> KV.get(:a) -nil -iex> KV.put(:a, 42) -:ok -iex> KV.get(:a) -42 -``` - -Now, let's say we wish to add some logging to the handling of the `:get` and -`:put` messages. We will apply a patch similar to the following: - -``` ---- a/kv.ex -+++ b/kv.ex -@@ -1,7 +1,8 @@ - defmodule KV do -+ require Logger - use GenServer - -- @vsn 0 -+ @vsn 1 - - def start_link() do - GenServer.start_link(__MODULE__, [], name: __MODULE__) -@@ -20,10 +21,12 @@ defmodule KV do - end - - def handle_call({:get, key, default}, _caller, state) do -+ Logger.info("#{__MODULE__}: Handling get request for #{key}") - {:reply, Map.get(state, key, default), state} - end - - def handle_call({:put, key, value}, _caller, state) do -+ Logger.info("#{__MODULE__}: Handling put request for #{key}:#{value}") - {:reply, :ok, Map.put(state, key, value)} - end -``` - -Without closing the current `iex` session, apply the patch to the file and -compile the module: - -``` -% patch kv.ex kv.ex.patch -% elixirc kv.ex -``` - -> You may see a warning about redefining an existing module, this warning can -> be safely ignored. - -Now, in the still open `iex` session, let's begin the black magic incantations: - -``` -iex> :code.load_file KV -{:module, KV} -iex> :sys.suspend(KV) -:ok -iex> :sys.change_code(KV, KV, 0, nil) -:ok -iex> :sys.resume(KV) -:ok -``` - -Now, we should be able to test it again: - -``` -iex> KV.get(:a) -21:28:47.989 [info] Elixir.KV: Handling get request for a -42 -iex> KV.put(:b, 2) -21:28:53.729 [info] Elixir.KV: Handling put request for b:2 -:ok -``` - -Thus, we are able to hot-swap running code, without stopping, losing state, or -effecting processes waiting for that data! - -But the above is merely an example of manually invoking the code reloading API, -there are better ways to achieve the same result. - -### Example: `iex` ### - -There are several functions available to us when using `iex` that essentially -perform the above actions for us: - -* `c/1`: compile file - -* `r/1`: (recompile and) reload module - -The `r/1` helper takes an atom of the module to reload, `c/1` takes a binary of -the path to the module to compile. Check the [documentation][15] for more -information. - -Therefore, using these, we can simplify what we did in the previous example to -simply a call to `r/1`: - -``` -iex> r KV -warning: redefining module KV (current version loaded from Elixir.KV.beam) - kv.ex:1 - -{:reloaded, KV, [KV]} -iex> KV.get(:a) - -21:52:47.829 [info] Elixir.KV: Handling get request for a -42 -``` - -In one function, we have done what previously took four functions. However, the -story does not end here. This was only for a single module, one `GenServer`. -What about when we want to upgrade more modules, or an entire application? - -> Although `c/1` and `r/1` are great for development. They are *not* -> recommended for production use. Do not depend on them to perform deployments. - -## Relups ## - -Fortunately, there is another set of tooling that allows us to more easily -deploy releases, and more pointedly, perform upgrades: Relups. Before we dive -straight into relups, let's discuss a few other related concepts. - -### Erlang Applications ### - -As part of Erlang "Applications", there is a related file, the [`.app`][16] -file. This resource file describes the application: other applications that -should be started and other metadata about the application. Using Elixir, this -file can be found in the `_build/{Mix.env}/lib/{app_name}/ebin/` folder. - -Here's an example `.app` file from the [octochat][17] demo application: - -``` -± cat _build/dev/lib/octochat/ebin/octochat.app -{application,octochat, - [{registered,[]}, - {description,"Demo Application for How Swapping Code"}, - {vsn,"0.3.3"}, - {modules,['Elixir.Octochat','Elixir.Octochat.Acceptor', - 'Elixir.Octochat.Application','Elixir.Octochat.Echo', - 'Elixir.Octochat.ServerSupervisor', - 'Elixir.Octochat.Supervisor']}, - {applications,[kernel,stdlib,elixir,logger]}, - {mod,{'Elixir.Octochat.Application',[]}}]}. -``` - -This is a pretty good sized triple (3-tuple). By the first element of the -triple, we can tell it is an `application`, the application's name is -`octochat` given by the second element, and everything in the list that follows -is a keyword list that describes more about the `octochat` application. -Notably, we have the usual metadata found in the `mix.exs` file, the `modules` -that make up the application, and the other OTP applications this application -requires to run. - -### Erlang Releases ### - -An Erlang "release", similar to Erlang application, is an entire system: the -Erlang VM, the dependent set of applications, and arguments for the Erlang VM. - -After building a release for the Octochat application with the -[`distillery`][4] project, we get a `.rel` file similar to the following: - -``` -± cat rel/octochat/releases/0.3.3/octochat.rel -{release,{"octochat","0.3.3"}, - {erts,"8.1"}, - [{logger,"1.3.4"}, - {compiler,"7.0.2"}, - {elixir,"1.3.4"}, - {stdlib,"3.1"}, - {kernel,"5.1"}, - {octochat,"0.3.3"}, - {iex,"1.3.4"}, - {sasl,"3.0.1"}]}. -``` - -This is an Erlang 4-tuple; it's a `release` of the `"0.0.3"` version of -`octochat`. It will use the `"8.1"` version of "erts" and it depends on the -list of applications (and their versions) provided in the last element of the -tuple. - -### Appups and Relups ### - -As the naming might suggest, "appups" and "relups" are the "upgrade" versions -of applications and releases, respectively. Appups describe how to take a -single application and upgrade its modules, specifically, it will have -instructions for upgrading modules that require "extras". or, if we are -upgrading supervisors, for example, the Appup will have the correct -instructions for adding and removing child processes. - -Before we examine some examples of these files, let's first look at the type -specification for each. - -Here is the syntax structure for the `appup` resource file: - -``` -{Vsn, - [{UpFromVsn, Instructions}, ...], - [{DownToVsn, Instructions}, ...]}. -``` - -The first element of the triple is the version we are either upgrading to or -downgrading from. The second element is a keyword list of upgrade instructions -keyed by the version the application would be coming _from_. Similarly, the -third element is a keyword list of downgrade instructions keyed by the version -the application will downgrade _to_. For more information about the types -themselves, see the [SASL documentation][18]. - -Now that we have seen the syntax, let's look at an example of the appup -resource file for the octochat application generated using [distillery][4]: - -``` -± cat rel/octochat/lib/octochat-0.2.1/ebin/octochat.appup -{"0.2.1", - [{"0.2.0",[{load_module,'Elixir.Octochat.Echo',[]}]}], - [{"0.2.0",[{load_module,'Elixir.Octochat.Echo',[]}]}]}. -``` - -Comparing this to the syntax structure above, we see that we have a `Vsn` -element of `"0.2.1"`, we have a `{UpFromVsn, Instructions}` pair: -`[{"0.2.0",[{load_module,'Elixir.Octochat.Echo',[]}]}]`, and we have a single -`{DownToVsn, Instructions}` pair: -`[{"0.2.0",[{load_module,'Elixir.Octochat.Echo',[]}]}]`. - -The instructions themselves tell us what exactly is required to go from one -version to the another. Specifically, in this example, to upgrade, we need to -"load" the `Octochat.Echo` module into the VM. Similarly, the instructions to -downgrade are the same. For a [semantically versioned][21] project, this is an -understandably small change. - -It's worth noting the instructions found in the `.appup` files are usually -high-level instructions, thus, `load_module` covers both the loading of object -code into memory and the suspend, replace, resume process of upgrading -applications. - -Next, let's look at the syntax structure of a `relup` resource file: - -``` -{Vsn, - [{UpFromVsn, Descr, Instructions}, ...], - [{DownToVsn, Descr, Instructions}, ...]}. -``` - -This should look familiar. It's essentially the exact same as the `.appup` -file. However, there's an extra term, `Descr`. The `Descr` field can be used as -part of the version identification, but is optional. Otherwise, the syntax of -this file is the same as the `.appup`. - -Now, let's look at an example `relup` file for the same release of octochat: - -``` -± cat rel/octochat/releases/0.2.1/relup -{"0.2.1", - [{"0.2.0",[], - [{load_object_code,{octochat,"0.2.1",['Elixir.Octochat.Echo']}}, - point_of_no_return, - {load,{'Elixir.Octochat.Echo',brutal_purge,brutal_purge}}]}], - [{"0.2.0",[], - [{load_object_code,{octochat,"0.2.0",['Elixir.Octochat.Echo']}}, - point_of_no_return, - {load,{'Elixir.Octochat.Echo',brutal_purge,brutal_purge}}]}]}. -``` - -This file is a little more dense, but still adheres to the basic triple syntax -we just examined. Let's take a closer look at the upgrade instructions: - -``` -[{load_object_code,{octochat,"0.2.1",['Elixir.Octochat.Echo']}}, - point_of_no_return, - {load,{'Elixir.Octochat.Echo',brutal_purge,brutal_purge}}] -``` - -The first instruction, -`{load_object_code,{octochat,"0.2.1",['Elixir.Octochat.Echo']}}`, tells the -[release handler][22] to load into memory the new version of the -"Octochat.Echo" module, specifically the one associated with version "0.2.1". -But this instruction will not instruct the release handler to (re)start or -replace the existing module yet. Next, `point_of_no_return`, tells the release -handler that failure beyond this point is fatal, if the upgrade fails after -this point, the system is restarted from the old release version ([appup -documentation][18]). The final instruction, -`{load,{'Elixir.Octochat.Echo',brutal_purge,brutal_purge}}`, tells the release -handler to replace the running version of the module and use the newly loaded -version. - -For more information regarding `burtal_purge`, check out the "PrePurge" and -"PostPurge" values in the [appup documentation][18]. - -Similar to the `.appup` file, the third element in the triple describes to the -release handler how to downgrade the release as well. The version numbers in -this case make this a bit more obvious as well, however, the steps are -essentially the same. - -### Generating Releases and Upgrades with Elixir ### - -Now that we have some basic understanding of releases and upgrades, let's see -how we can generate them with Elixir. We will generate the releases with the -[distillery][4] project, however, the commands should also work with the soon -to be deprecated [exrm][2] project. - -> This has been written for the `0.10.1` version of [distillery][4]. This is a -> fast moving project that is in beta, be prepared to update as necessary. - -Add the [distillery][4] application to your `deps` list: - - {:distillery, "~> 0.10"} - -Perform the requisite dependency download: - -``` -± mix deps.get -``` - -Then, to build your first production release, you can use the following: - -``` -± MIX_ENV=prod mix release --env prod -``` - -> For more information on why you must specify both environments, please read -> the [FAQ][19] of distillery. If the environments match, there's a small -> modification to the `./rel/config.exs` that can be made so that specifying -> both is no longer necessary. - -After this process is complete, there should be a new folder under the `./rel` -folder that contains the new release of the project. Within this directory, -there will be several directories, namely, `bin`, `erts-{version}`, `lib`, and -`releases`. The `bin` directory will contain the top level Erlang entry -scripts, the `erts-{version}` folder will contain the requisite files for the -Erlang runtime, the `lib` folder will contain the compiled beam files for the -required applications for the release, and finally, the `releases` folder will -contain the versions of the releases. Each folder for each version will have -its own `rel` file, generated boot scripts, as per the [OTP releases -guide][20], and a tarball of the release for deployment. - -Deploying the release is a little out of scope for this post and may be the -subject of another. For more information about releases, see the [System -Principles][23] guide. However, for Elixir, it may look similar to the -following: - -* Copy the release tarball to the target system: - - ``` - ± scp rel/octochat/releases/0.3.2/octochat.tar.gz target_system:/opt/apps/. - ``` - -* On the target system, unpack the release: - - ``` - ± ssh target_system - (ts)# cd /opt/apps - (ts)# mkdir -p octochat - (ts)# tar -zxf octochat.tar.gz -C octochat - ``` - -* Start the system: - - ``` - (ts)# cd octochat - (ts)# bin/octochat start - ``` - -This will bring up the Erlang VM and the application tree on the target system. - -Next, after making some applications changes and bumping the project version, -we can generate an upgrade release using the following command: - -``` -± MIX_ENV=prod mix release --upgrade -``` - -> Note, This will _also_ generate a regular release. - -Once this process finishes, checking the `rel/{app_name}/releases` folder, -there should be a new folder for the new version, and a `relup` file for the -upgrade: - -``` -± cat rel/octochat/releases/0.3.3/octochat.rel -{release,{"octochat","0.3.3"}, - {erts,"8.1"}, - [{logger,"1.3.4"}, - {compiler,"7.0.2"}, - {elixir,"1.3.4"}, - {stdlib,"3.1"}, - {kernel,"5.1"}, - {octochat,"0.3.3"}, - {iex,"1.3.4"}, - {sasl,"3.0.1"}]}. - -± cat rel/octochat/releases/0.3.3/relup -{"0.3.3", - [{"0.3.2",[], - [{load_object_code,{octochat,"0.3.3",['Elixir.Octochat.Echo']}}, - point_of_no_return, - {suspend,['Elixir.Octochat.Echo']}, - {load,{'Elixir.Octochat.Echo',brutal_purge,brutal_purge}}, - {code_change,up,[{'Elixir.Octochat.Echo',[]}]}, - {resume,['Elixir.Octochat.Echo']}]}], - [{"0.3.2",[], - [{load_object_code,{octochat,"0.3.1",['Elixir.Octochat.Echo']}}, - point_of_no_return, - {suspend,['Elixir.Octochat.Echo']}, - {code_change,down,[{'Elixir.Octochat.Echo',[]}]}, - {load,{'Elixir.Octochat.Echo',brutal_purge,brutal_purge}}, - {resume,['Elixir.Octochat.Echo']}]}]}. -``` - -Similarly, to deploy this new upgrade, copy the tarball to the target system -and unpack it into the same directory as before. - -After it's unpacked, upgrading the release can be done via a stop and start, or -we can issue the `upgrade` command: - - (ts)# bin/octochat stop - (ts)# bin/octochat start - -Or: - - (ts)# bin/octochat upgrade "0.3.3" - -When starting and stopping, the entry point script knows how to select the -"newest" version. - -When upgrading, it is required to specify the desired version, this is -necessary since the upgrade process may require more than simply jumping to the -"latest" version. - -## Summary ## - -Release management is a complex topic, upgrading without restarting seemingly -even more so. However, the process _can_ be understood, and knowing how the -process works will allow us to make more informed decisions regarding when to -use it. - -The tooling for performing hot upgrades has been around for a while, and while -the tooling for Elixir is getting closer, we are not quite ready for prime -time. But it won't remain this way for long. Soon, it will be common place for -Elixir applications to be just as manageable as the Erlang counterparts. - -[1]: http://erlang.org/doc/reference_manual/code_loading.html - -[2]: https://github.com/bitwalker/exrm - -[3]: https://github.com/erlware/relx - -[4]: https://github.com/bitwalker/distillery - -[5]: http://erlang.org/doc/man/sys.html - -[6]: http://erlang.org/doc/man/code.html - -[7]: http://elixir-lang.org/docs/stable/elixir/ - -[8]: http://elixir-lang.org/docs/stable/elixir/Code.html - -[9]: http://erlang.org/doc/man/relup.html - -[10]: http://andrealeopardi.com/posts/handling-tcp-connections-in-elixir/ - -[11]: https://git.devnulllabs.io/demos/octochat.git - -[12]: https://www.youtube.com/watch?v=xrIjfIjssLE - -[13]: https://docker.com - -[14]: http://kubernetes.io/ - -[15]: http://elixir-lang.org/docs/stable/iex/IEx.Helpers.html - -[16]: http://erlang.org/doc/man/app.html - -[17]: https://git.devnulllabs.io/demos/octochat.git - -[18]: http://erlang.org/doc/man/appup.html - -[19]: https://hexdocs.pm/distillery/common-issues.html#why-do-i-have-to-set-both-mix_env-and-env - -[20]: http://erlang.org/doc/design_principles/release_structure.html - -[21]: http://semver.org - -[22]: http://erlang.org/doc/man/release_handler.html - -[23]: http://erlang.org/doc/system_principles/create_target.html diff --git a/blag/content/blog/elixir_otp_releases.markdown b/blag/content/blog/elixir_otp_releases.markdown deleted file mode 100644 index 6936a9b..0000000 --- a/blag/content/blog/elixir_otp_releases.markdown +++ /dev/null @@ -1,324 +0,0 @@ ---- -title: "Releasing Elixir/OTP applications to the World" -description: "The perils of releasing OTP applications in the wild" -tags: - - "Erlang/OTP" - - "Elixir" - - "Phoenix" - - "Docker" - - "How-to" - - "Tips and Tricks" -date: "2016-05-27" -categories: - - "Development" -slug: "elixir-otp-releases" ---- - -Developing Elixir/OTP applications is an enlightening, mind-boggling, and -ultimately enjoyable experience. There are so many features of the language -that change the very way we as developers think about concurrency and program -structure. From writing pure functional code, to using message passing to -coordinate complex systems, it is one of the best languages for the SMP -revolution that has been slowly boiling under our feet. - -However, _releasing_ Elixir and OTP applications is an entirely different and -seemingly seldom discussed topic. - -The distribution tool chain of Erlang and OTP is a complicated one, There's -[`systools`][9], [`reltool`][10], [`rebar(?3)`][11], and [`relx`][12] just to -name a few that all ultimately help in creating an Erlang/OTP ["release"][13]. -Similar to `rebar3`, [`exrm`][7] takes the high-level abstraction approach to -combining `reltool` and `relx` into a single tool chain for creating releases of -Elixir projects. Of course, we can also borrow from the collection of -[autotools][14]. - -There are plenty of articles and posts discussing how and why to use -`exrm`. I feel many of them, however, fail to _truly_ discuss _how_ to do this -effectively. Most will mention the surface of the issue, but never give the -issue any real attention. As any developer that wants to eventually _ship_ -code, this is entirely too frustrating to leave alone. - -There are "ways" of deploying OTP code relatively simply, however, these -methods generally avoid good practice of continuous integration/continuous -deployment, e.g., "build the OTP application _on_ the target system" or simply -use `mix run`, etc. - -I cannot speak for everyone, but my general goal is to _not_ have such a -manual step in my release pipeline, let alone having a possibly full autotool -chain and Erlang/Elixir stack on the production system is slightly unnerving -for it's own set of reasons. - -## Problem ## - -Here are some selected quotes; I'm not trying to pick on anyone in particular -or the community at large, but I'm trying to show a representation of why this -very topic is an issue in the first place. - -> We need to be sure that the architectures for both our build and hosting -> environments are the same, e.g. 64-bit Linux -> 64-bit Linux. If the -> architectures don't match, our application might not run when deployed. Using -> a virtual machine that mirrors our hosting environment as our build -> environment is an easy way to avoid that problem. -[Phoenix Exrm Releases][8]. - -And another, similar quote: - -> One important thing to note, however: *you must use the same architecture for -> building your release that the release is getting deployed to.* If your -> development machine is OS X and you’re deploying to a Linux server, you need -> a Linux machine to build your exrm release or it isn't going to work, or you -> can just build on the same server you’re going to be running everything on. -[Brandon Richey][1]. - -Unfortunately, these miss a lot of the more subtle issues, dependency hell is -real, and we're about to really dive into it. - -There are a few examples where "same architecture" isn't enough, and this is -where we will spend the majority of our time. - -For these examples, we will assume our host machine is running GNU/Linux, -specifically Arch Linux, and our target machine is running CentOS 7.2. Both -machines are running the `AMD64` instruction sets, the architectures are the -_same_. - -### Shared Objects ### - -Let's start with the most simplistic issue, different versions of shared -objects. - -Arch Linux is a rolling release distribution that is generally _right_ on the -bleeding edge of packages, upstream is usually the development sources -themselves. When `ncurses` moves version 6, Arch isn't far behind in putting it -in the stable package repository (and rebuilding a number of packages that -depend on `ncurses`). CentOS, on the other hand, is not so aggressive. -Therefore, when using the default `relx` configuration with `exrm`, the Erlang -runtime system (ERTS) bundled with the release _will_ be incompatible with the -target system. - -When the OTP application is started, an obscure linking error will be emitted -complaining about how ERTS cannot find a `ncurses.so.6` file and promptly fail. - -Worse, after possibly "fixing" this issue, `ncurses` is only one of a few -shared objects Erlang needs to run, depending on what was enabled when Erlang -was built or what features the deployed application needs. - -### Erlang Libraries ### - -We may try to resolve this issue by adding a particular `rel/relx.config` file -to our Elixir project. Specifically, we will _not_ bundle ERTS, opting to use -the target's ERTS instead. - - {include_erts, false}. - -This seems like a promising approach, until another error message is emitted at -startup, namely, ERTS cannot find `stdlib-2.8` in the `/usr/lib/erlang/lib` -folder. - -Did I mention that our current build system is Arch and our target is CentOS? -Arch may have the _newest_ version of Erlang in the repository and CentOS is -still at whatever it was at before: R16B unless the [Erlang Solutions][1] -release is being used. - -Since Erlang applications do (patch number) version locking, applications in -the dependency tree will need to match exactly and it's guaranteed that any and -all OTP applications will be at least depending on the Erlang kernel and the -Erlang standard library, these are at least two OTP applications _our_ -application is going to need that are *no longer packaged when `relx` doesn't -bundle ERTS*. - -Even if we specify another option to `relx`, namely, `{system_libs, true}.`, we -are left with the same lack of Erlang system libraries. - -That's correct and there is some sensible reasons for this. If we ask `exrm` -and therefore `relx` to not include the build system's ERTS, we are _also_ -excluding the standard Erlang libraries from the release as well, asking to -include the standard libraries of the build system's ERTS could run into the -_very_ same issues as above for a whole host of other reasons. - -We are left to attempt more solutions. - -### Docker or Virtualization ### - -Next, since we do want to ultimately get our build running in a CI/CD -environment, we may look toward virutalization/containerization. Being sensible -people, we try to use a small image, maybe basing our image on [Alpine -Linux][2] as to be nice to our precious `/var` or SSD space. We may even go so -far as to build Erlang and Elixir ourselves in these images to make sure we -have the most control over them as we can. Furthermore, since we are building -everything ourself, shipping the built ERTS seems like a good idea too, so we -can delete the `rel/relx.config` file. - -This seems promising. However, we have shared object problems again. Since we -are building Erlang and Elixir ourselves, we decided to disable `termcap` -support thus no longer requiring the `ncurses` library altogether. We hope that -the `openssl` libraries are the same, so we don't have to worry about that -mess, and we move on. - -This time, when we attempt to deploy the application, we get a different, -obscure error: something about our `musl` C library isn't found on the target -system. Right, because we are trying to create a small image, we opted to use -the `musl` C library because of its size and being easily supported in the -Alpine Linux container. Trying to use GNU C library is too cumbersome and would -only inflate the image beyond any gains we would achieve by using Alpine in the -first place. - -That's not going to work. - -### OTP as Project Dependency ### - -Another option we might try is make Erlang a build dependency of our Elixir -application, this _could_ be achieved via the following structure: - - {:otp, - "~> 18.3.2", - github: "erlang/otp", - tag: "OTP-18.3.2", - only: :prod, - compile: "./otp_build autoconf;" <> - "./configure --without-termcap --without-javac;" <> - "make -j4" <> - "DISTDIR=/tmp/erlang make install" - } - -Then using `rel/relx.config` with: - - {include_erts, "/tmp/erlang"}. - -*May* turn out to work, assuming the build server and the target system have -the same shared objects for OpenSSL and others that may be enabled by default. - -> However, I didn't follow this idea all the way to the end as I wasn't -> entirely happy with it, and it would fall to some later issues. - -Notably, though, this will inflate the production builds drastically since our -`mix deps.get` and `mix deps.compile` steps will hang attempting to build -Erlang itself. - -However, again, we will likely run into issues with the C library used by the -build system/container. Going this route doesn't allow us to use Alpine Linux -either. - -Worse, there's another issue that hasn't even shown itself but is lying in -wait: native implemented (or interface) functions (NIFs). - -If our project has a dependency that builds a NIF as part of its build -(Elixir's [comeonin][16] is a good example of this), unless the NIF is -statically compiled, we are back to square one and shared objects are not our -friends. Furthermore, if we are using a different standard library -implementation, i.e., `musl` vs `glibc`, the dependency will likely complain -about it as well. - -## Non-Solution Solutions ## - -Of course, all of these above issues can be solved by "just building on the -target machine" or by simply using `mix run` on the target instead. However, I -personally find these solutions unacceptable. - -I'm not overly fond of requiring my target hosts, my production machines, -running a full development tool chain. Before this is dismissed as a personal -issue, remember that our dependency tree may contain NIFs outside of our -control. Therefore, it's not just Erlang/Elixir that are required to be on the -machine, but a C standard library and autotools too. - -This solution doesn't immediately give the impression of scaling architecture. -If a new release needs to be deployed, each server will now need to spare some -load for building the project and its dependencies before any real, actual -upgrading can continue. - -## Solutions(?) ## - -What are we to do? How are we to build Erlang/Elixir/OTP applications as part -of our CI/CD pipeline? Particularly, how are we to build our applications on a -CI/CD system and *not* the production box(es) themselves? - -If any of the above problems tell us anything, it's that the build system must -be either the *exact same* machine or clone with build tools. Thankfully, we -can achieve a "clone" without too much work using [Docker][3] and the [official -image registries][4]. - -By using the official CentOS image and a specific tag, we can match our target -system almost exactly. Furthermore, building the Erlang/Elixir stack from -source is a relatively small order for a Docker container too, making -versioning completely within reach. Moreover, since the build host and the -target host are nearly identical, bundling ERTS should be a non-issue. - -> This is the observed result of using [docker-elixir-centos][5] for a base -> image for CI builds. - -Another possible solution is to ship Docker containers as the artifact of the -build. However, this, to do well, requires a decent Docker capable -infrastructure and deployment process. Furthermore, going this route, it's -unlikely that `exrm` is even necessary at all. It is likely more appropriate to -simply use `mix run` or whatever the project's equivalent is. Another thing -lost here, is [relups][6], which is essentially the whole reason of wanting to -use `exrm` in the first place. - -As such, if using `exrm` is desired, setting up a build server will be -imperative to building reliably and without building on production. Scaling -from a solid build foundation will be much easier than building and "deploying" -on the production farm itself. - -## Moving Forward ## - -Releasing software isn't in a particularly hard class of problems, but it does -have its challenges. Some languages attempt to solve this challenge in its -artifact/build result. Other languages, unfortunately, don't attempt to solve -this problem at all. Though, I can see it possible to eventually reach a goal -of being able to create binary releases with steps as simple as `./configure && -make && make install && tar`. - -But we aren't there yet. - -But we are close. - -The current way Erlang/OTP applications want to be deployed includes wanting to -ship _with_ the runtime, this is a great starting point. - -To move to a better, easier release cycle, we need a few things: - -* The ability to (natively) cross-compile to different architectures and - different versions of ERTS _and_ cross-compile Erlang code itself. - -* The ability to easily statically compile ERTS and bundle the result for the - specified architecture. - -Cross-compiling to different versions of ERTS is likely a harder problem to -tackle. But being able to cross-compile the ERTS itself is likely much easier -since this is already a [feature][15] of GCC. - -Thus, our problem is now how do we add and/or expose the facility of -customizing the appropriate build flags to our projects and dependencies to -cross-compile a static ERTS and any NIFs and bundle these into a solid OTP -release. - -[1]: https://erlang-solutions.com - -[2]: http://alpinelinux.org - -[3]: https://docker.com - -[4]: https://hub.docker.com/explore/ - -[5]: https://github.com/kennyballou/docker-elixir-centos - -[6]: http://erlang.org/doc/man/relup.html - -[7]: https://github.com/bitwalker/exrm - -[8]: http://www.phoenixframework.org/docs/advanced-deployment - -[9]: http://erlang.org/doc/man/systools.html - -[10]: http://erlang.org/doc/man/reltool.html - -[11]: https://github.com/erlang/rebar3/releases - -[12]: https://github.com/erlware/relx - -[13]: http://erlang.org/doc/design_principles/release_structure.html - -[14]: https://en.wikipedia.org/wiki/GNU_Build_System - -[15]: https://www.gnu.org/software/automake/manual/html_node/Cross_002dCompilation - -[16]: https://hex.pm/packages/comeonin diff --git a/blag/content/blog/git-in-reverse.markdown b/blag/content/blog/git-in-reverse.markdown deleted file mode 100644 index 5971523..0000000 --- a/blag/content/blog/git-in-reverse.markdown +++ /dev/null @@ -1,684 +0,0 @@ ---- -title: "Learning Git in Reverse" -description: "A backwards introduction to the information manager from hell" -tags: - - "Git" - - "Learning" - - "Talks" - - "zData" -date: "2016-01-18" -pubdate: "2016-01-20" -categories: - - "Development" -slug: "git-in-reverse" ---- - -> The content of this post is drafted from contents of a [similarly titled -> presentation][23]. - -It is certainly counter-intuitive to learn to drive by first going backwards, -so why learn how to use Git in reverse? The short answer is: knowing the -internals of Git _should_ make the commands and workflows of Git more -accessible and understandable. - -We will start by touring the plumbing commands and walk all the way through -branching. - -## What is Git± ## - -{{< figure src="http://imgs.xkcd.com/comics/git.png" caption="If that doesn't fix it, git.txt contains the phone number of a friend of mine who understands git. Just wait through a few minutes of 'It's really pretty simple, just think of branches as...' and eventually you'll learn the commands that will fix everything." alt="XKCD on Git" >}} - -Git is a few things to many people, and creating a standard definition is our -first step to fully understanding the nebulous Git. - -Git, developed by Linus Torvalds, is a distributed version control system -(DVCS). This means, Git is a tool for managing, typically, source code and its -related versioning. It accomplishes this by creating a distributed acyclic -graph of the code and folder structure and tracking the changes in the graph. - -Git internally accomplishes this by using a key-value or content addressable -filesystem. Git only knows how to store "objects". There is really no other -_real_ thing that Git is storing. - -## Plumbing ## - -We will start by learning a few of the most basic plumbing commands of Git, -beginning with the [`git-hash-object(1)`][5] command: - -### Git Objects ### - -Git objects are a [zlib][3] compressed binary file stored under the -`.git/objects` folder of any Git repository. They are typically created with -the [`git-hash-object(1)`][5] command are very basic in content: several bytes -of header information used by Git, type and size, and the full contents of the -file Git is storing. - -For the majority of this post, we will be referencing objects created in a -temporary repository: - - $ cd /tmp - $ git init foo - $ cd foo - -> The [`git-init(1)`][4] command creates a new local Git repository in the -> current directory or creates a new directory with a newly initialized Git -> repository. - -After creating a new Git repository, let's examine its current contents: - - ± find .git - .git - .git/objects - .git/objects/info - .git/objects/pack - .git/config - .git/HEAD - .git/hooks - .git/hooks/post-checkout - .git/hooks/post-commit - .git/hooks/ctags - .git/hooks/post-merge - .git/hooks/post-rewrite - .git/refs - .git/refs/tags - .git/refs/heads - -We see that Git has created several folders and files for its internal usage. -We, as developers and users of Git, should generally never need to do anything -to these files, with a small exception for `.git/hooks`. - -As noted before, `.git/objects` will be where Git will store all the objects -(source code and related) we create. `.git/hooks` are used for add custom -operations (white-space, conversions, `ctags`, etc.) to Git's operation. -`.git/refs` is where Git stores information about tags and branches. -`.git/config` is a file for local Git configuration options. This file will -store information about our repository and where it will go for -synchronization. `.git/HEAD` stores a reference to the working copy commit -hash. - -With all this out of the way, we can now start creating objects. - -#### [`git-hash-object(1)`][5] #### - -We can start out by providing some content for [`git-hash-object(1)`][5]: - - ± echo 'foo' | git hash-object --stdin - 257cc5642cb1a054f08cc83f2d943e56fd3ebe99 - -[`git-hash-object(1)`][5] typically expects filenames, so we provide `--stdin` -to tell it we are passing contents from the standard input stream. - -However, since we haven't told Git to store the contents, we have no objects -stored in the `.git/objects` folder. We will need to pass the `-w` flag to -[`git-hash-object(1)`][5] to tell Git to store the contents. - - ± echo 'foo' | git hash-object -w --stdin - 257cc5642cb1a054f08cc83f2d943e56fd3ebe99 - -Now, if we examine the `.git/objects` folder, we will see a new folder and a -new file: - - ± find .git/objects -type f - .git/objects/25/7cc5642cb1a054f08cc83f2d943e56fd3ebe99 - -When Git stores objects, it splits the 40 character hash into two parts: the -first two characters and the last 38. The first two characters, in this case -25, as the folder, and the last 38, `7cc5642cb1a054f08cc83f2d943e56fd3ebe99`, -as the file. The purpose of splitting the hash is to make indexing quicker. -Some of the original motivations for developing Git was a requirement of speed -and performance, can't manage decades of kernel history with a slow tool. - -We can use another Git plumbing command to extract the contents of the object: -`git-cat-file`: - - ± git cat-file -p 257cc5642cb1a054f08cc83f2d943e56fd3ebe99 - foo - -Passing `-p` to `git-cat-file` tells Git to infer the type of the object. -Otherwise, we need to tell Git what the object is. - -Moreover, because we know the objects are zlib compressed, we can use a tool -like `zlib-flate` or similar to peer inside the contents of the files -ourselves: - - ± cat .git/objects/25/7cc5642cb1a054f08cc83f2d943e56fd3ebe99 \ - > | zlib-flate -uncompress - blob 4foo - -Here we see the metadata that Git uses itself, but is otherwise the contents we -expect. - -Perfect. We can store content in Git's object store and we can retrieve the -contents. However, attempting to manage files in this way will be more taxing -than any form of development. Furthermore, we don't have a way to store -filenames yet. Thus, we will need a new type of object, trees. - -### Git Trees ### - -Trees are objects. - -Trees are similarly zlib compressed binaries of the internal data structure of -tracked folder structure of the repository. We create Git trees using the -[`git-update-index(1)`][6] and [`git-write-tree(1)`][7] plumbing commands. - -Since we have an object already added to the Git object store, we can go ahead -and create a basic tree: - - ± git update-index --add --cacheinfo 100644 \ - 257cc5642cb1a054f08cc83f2d943e56fd3ebe99 foo.txt - ± git write-tree - fcf0be4d7e45f0ef9592682ad68e42270b0366b4 - -Thus far, we have created two objects, one to store the contents of `foo.txt` -and another as the tree, which stores binding between the contents and the -filename for `foo.txt`. - -Visually, this may look like something similar to the following image: - -{{< figure src="/media/git-tree-1.png" alt="Git Tree" >}} - -If we inspect the `.git/objects` directory, we should see a new object: - - ± find .git/objects -type f - .git/objects/fc/f0be4d7e45f0ef9592682ad68e42270b0366b4 - .git/objects/25/7cc5642cb1a054f08cc83f2d943e56fd3ebe99 - -As we expected, there is a new folder and new file, `fc` and -`f0be4d7e45f0ef9592682ad68e42270b0366b4`, respectively. - -Since Git trees are actually objects, we can use the `git-cat-file` command -again to print out the contents of the tree: - - ± git cat-file -p fcf0be4d7e45f0ef9592682ad68e42270b0366b4 - 100644 blob 257cc5642cb1a054f08cc83f2d943e56fd3ebe99 foo.txt - -That is, trees are objects where the contents of the object describes a folder -structure "tree". It uses 4 columns for each element of the tree where the -first number uses something similar to the Unix permissions octals; the second -defines the type of object, this can be either `blob` or `tree`; the third is -the hash of the object the entry points to; finally, the last element is the -filename of the object or folder name if the element is a tree. - -A more complicated example of a Git tree may look like the following image: - -{{< figure src="/media/git-tree-2.png" alt="Another Git Tree" >}} - -Now we have file names and the ability to track folders, however, we are still -managing and holding onto the checksums ourselves. Furthermore, we have no -reference to who, when, why, or from where changes are being made. We need -another object to store this information. - -### Git Commits ### - -This will sound familiar: Git commits are ... objects. - -Git stores commits the same way it stores files and trees, as a zlib compressed -binary in the `.git/objects` folders. Similar to trees, the contents of the -object is specifically formatted, but they are stored the same nonetheless. We -can create commits using the [`git-commit-tree(1)`][8] plumbing command. - -The [`git-commit-tree(1)`][8] command takes a message, a tree, and optionally a -parent commit, and creates a commit object. If the parent is not specified, it -creates a root commit. - -We have just created a tree, let's see what committing that tree looks like: - - ± echo 'our first commit' \ - > | git commit-tree fcf0be4d7e45f0ef9592682ad68e42270b0366b4 - d7ee3cdd8bfcc1b8c3f935302f2d2e78e69e4197 - -> Notice, the hash returned here _will_ be different. This hash is dependent on -> time and the author. - -Inspecting our `.git/objects` store, we will see our new object: - - ± find .git/objects -type f - .git/objects/d7/ee3cdd8bfcc1b8c3f935302f2d2e78e69e4197 - .git/objects/fc/f0be4d7e45f0ef9592682ad68e42270b0366b4 - .git/objects/25/7cc5642cb1a054f08cc83f2d943e56fd3ebe99 - -Similar to trees and files, we can use the `git-cat-file` command to inspect -the newly created commit object: - - ± git cat-file -f d7ee3cdd8bfcc1b8c3f935302f2d2e78e69e4197 - tree fcf0be4d7e45f0ef9592682ad68e42270b0366b4 - author kballou 1453219069 -0700 - committer kballou 1453219069 -0700 - - our first commit - -Breaking down this structure, we have 4 lines, the first line tells which tree -this commit is saving. Since a tree already contains the information of all the -objects that are currently being tracked, the commit only needs to save the -root tree to be able to save _all_ the information for a commit. The second and -third line tell us the author and committer, often these will be the same. They -will be different for GitHub pull requests, or in other situations where the -author of the patch or change is different from the maintainer of the project. -Finally, after a blank line, the rest of the file is reserved for the commit -message; since "our first commit" message is short, it only takes a single -line. - -{{< figure src="/media/git-commit-1.png" alt="Git Commit" >}} - -To inform Git that we have created a commit, we need to add some information to -a few files. First, we need create the `master` reference. We do this by -putting the full commit hash into a file called `.git/refs/heads/master`: - - ± echo d7ee3cdd8bfcc1b8c3f935302f2d2e78e69e4197 > .git/refs/heads/master - -The next thing we should do is update the `.git/HEAD` file to point to our new -reference: - - ± echo 'ref: refs/heads/master' > .git/HEAD - -This brings Git up to speed on everything we have done manually, similarly, -this is what Git does for us when we use the porcelain commands for managing -code. However, it's not really recommended to be manually touching these files, -and in fact, there is another plumbing command for updating these files: -[`git-update-ref(1)`][9]. Instead of the two commands above, we can use a -single invocation of [`git-update-ref(1)`][9] to perform the above: - - ± git update-ref refs/heads/master d7ee3cdd8bfcc1b8c3f935302f2d2e78e69e4197 - -Notice, [`git-update-ref(1)`][9] is an idempotent operation, that is, if the -reference has already been changed to the current hash, running this command -again will yield no change. - -Before we get into the porcelain commands, let's walk through the motions -again: - - ± echo 'bar' > bar.txt - ± git hash-object -w bar.txt - 5716ca5987cbf97d6bb54920bea6adde242d87e6 - ± git update-index --add --cacheinfo 100644 \ - > 5716ca5987cbf97d6bb54920bea6adde242d87e6 bar.txt - ± git write-tree - b98c9a9f9501ddcfcbe02a9de52964ed7dd76d5a - -So far, we have added a new file, `bar.txt` with the contents of `bar`. We have -added the file to a new tree and we have written the tree to the object store. -Before we commit the new tree, let's perform a quick inspection of the tree: - - ± git cat-file -p b98c9a9f9501ddcfcbe02a9de52964ed7dd76d5a - 100644 blob 5716ca5987cbf97d6bb54920bea6adde242d87e6 bar.txt - 100644 blob 257cc5642cb1a054f08cc83f2d943e56fd3ebe99 foo.txt - -An entry for `foo.txt` is present in this new tree. Git is implicitly tracking -previous objects, and carrying them forward, we didn't have to do anything for -Git to do this. Furthermore, the only new objects in the object store so far is -the new object for the contents of `bar.txt` and the object for the new tree: - - ± find .git/objects -type f - .git/objects/b9/8c9a9f9501ddcfcbe02a9de52964ed7dd76d5a - .git/objects/57/16ca5987cbf97d6bb54920bea6adde242d87e6 - .git/objects/d7/ee3cdd8bfcc1b8c3f935302f2d2e78e69e4197 - .git/objects/fc/f0be4d7e45f0ef9592682ad68e42270b0366b4 - .git/objects/25/7cc5642cb1a054f08cc83f2d943e56fd3ebe99 - -Now, we can commit this new tree using the [`git-commit-tree(1)`][8] command: - - ± echo 'our second commit' | git commit-tree \ - > -p d7ee3cdd8bfcc1b8c3f935302f2d2e78e69e4197 \ - > b98c9a9f9501ddcfcbe02a9de52964ed7dd76d5a - b7fd7d75c1375858d8f355735a56228b3eb5e813 - -Let's inspect this newly minted commit: - - ± git cat-file -p b7fd7d75c1375858d8f355735a56228b3eb5e813 - tree b98c9a9f9501ddcfcbe02a9de52964ed7dd76d5a - parent d7ee3cdd8bfcc1b8c3f935302f2d2e78e69e4197 - author kballou 1453229013 -0700 - committer kballou 1453229013 -0700 - - our second commit - -This commit should look very similar to the previous commit we created. -However, here we have a line dedicated to the "parent" commit, which should -line up with the commit passed to the `-p` flag of [`git-commit-tree(1)`][8]. - -We can update the `master` reference, too, with the new hash: - - ± git update-ref refs/heads/master b7fd7d75c1375858d8f355735a56228b3eb5e813 - -Let's modify `foo.txt` and create another commit: - - ± echo 'foo 2' > foo.txt - ± git hash-object -w foo.txt - a3f555b643cbba18c0e69c82d8820c7487cebe15 - ± git update-index -add --cacheinfo 100644 \ - a3f555b643cbba18c0e69c82d8820c7487cebe15 foo.txt - ± git write-tree - 68b757546e08c1d9033c8802e4de1c0d591d90c8 - ± echo 'our third commit' | git commit-tree \ - > -p b7fd7d75c1375858d8f355735a56228b3eb5e813 \ - > 68b757546e08c1d9033c8802e4de1c0d591d90c8 - 354c7435a9959e662cea02495957daa93d875899 - ± echo 354c7435a9959e662cea02495957daa93d875899 > .git/refs/heads/master - -This final example, we have gone from creating a file, adding the file to a -tree, writing the tree, committing the tree, and finally, pushing forward the -`master` reference. - -There are a few more points to make before we go onto a brief tour of the -porcelain commands. - -Let's go ahead and inspect the current state of the object store: - - ± find .git/objects -type f - .git/objects/35/4c7435a9959e662cea02495957daa93d875899 - .git/objects/68/b757546e08c1d9033c8802e4de1c0d591d90c8 - .git/objects/a3/f555b643cbba18c0e69c82d8820c7487cebe15 - .git/objects/b7/fd7d75c1375858d8f355735a56228b3eb5e813 - .git/objects/57/16ca5987cbf97d6bb54920bea6adde242d87e6 - .git/objects/b9/8c9a9f9501ddcfcbe02a9de52964ed7dd76d5a - .git/objects/d7/ee3cdd8bfcc1b8c3f935302f2d2e78e69e4197 - .git/objects/fc/f0be4d7e45f0ef9592682ad68e42270b0366b4 - .git/objects/25/7cc5642cb1a054f08cc83f2d943e56fd3ebe99 - -There's a few things to notice here, every object we have created so far is -_still_ in the object store, the first version of `foo.txt` is still there -(`257cc5642...`). All the trees are still there, and of course the commits are -still around. This is because Git stores objects. It does not store computed -differences or anything of the sort, it simply stores the objects. Other -version control systems may store the patches, individually version files, -or even track file renames. Git does none of this. Git simply stores only the -objects you ask, it doesn't store the differences between any files, it doesn't -track that a file was renamed. Every commit points to the exact version of -_every_ file at that point in time. If a difference between the working file -and the stored version is asked for, it's computed, if a difference between -yesterday's version of a file and today's, it's computed. If a file was -renamed, it can be inferred by a similarity index and computing the difference -between Git trees. This achieves tremendous performance gains because computing -text differences is relatively cheap compared to attempting to manage code -patches as a means of versioning. - -## Porcelain ## - -Now that we have gone through our tour of the plumbing commands and Git -internals, we can start _actually_ use Git. It will be very rare that the -typical user of Git will ever be using any of the plumbing commands above or -touching files under the `.git` folder in their day-to-day work. For the -day-to-day usage of Git, we will be using the "porcelain" commands, the -commands that take the arduous steps above, and turn them into a pleasant walk -in the park. Essentially, everything we have done above can be accomplished -with two (2) commands in Git: [`git-add(1)`][10] and [`git-commit(1)`][11]. - -Let's initialize a new temporary repository for demonstration: - - $ cd /tmp - $ git init bar - $ cd bar - -After initializing the repository, we can add a file, say, `foo.txt`: - - ± echo 'foo' > foo.txt - -Next, we can use the [`git-add(1)`][10] command to stage the file to be -tracked: - - ± git add foo.txt - -Next, we can use the [`git-commit(1)`][11] command to commit the newly created -`foo.txt` file: - - ± git commit -m 'initial commit' - -Everything we have done so far is now achieved with these two commands. We have -stored the contents of the file, created a tree, and committed the tree. - -There are a few more commands that are very useful to using Git on a regular -basis: [`git-clone(1)`][12], [`git-status(1)`][13], [`git-log(1)`][14], -[`git-pull(1)`][15], [`git-push(1)`][16], and [`git-remote(1)`][17]. - -### [`git-clone(1)`][12] ### - -Before you can contribute to a project, you need your own copy of the -repository, this is where we would use [`git-clone(1)`][12]. As we have seen -before, we can create _new_ repositories with [`git-init(1)`][4], but we still -need a means of getting existing work from another source. - -Here's an example of using `git-clone`: - - $ git clone git://github.com/git/git.git - ... - -There are several protocols that can be used for the when cloning, listed here -in order of preference: - -* `SSH` - - - Bi-directional data transfer - - - Encrypted - - - Typically authenticated, especially without passwords - -* `Git` - - - Pull only - - - Intelligent transfer mechanism - -* `HTTP/S` - - - Bi-directional data transfer - - - Authenticated - - - Unintelligent transfer mechanism - -* `File` - -### [`git-status(1)`][13] ### - -Often, you will need to know the state of the current repository, and the go-to -command to view the current state of the repository is the -[`git-status(1)`][13] command. It will give you information about the currently -modified files, the currently untracked files, the branch you're one, if the -branch is tracked upstream, it will let you know if you have something to push, -etc. - -### [`git-log(1)`][14] ### - -[`git-log(1)`][14] is used to check the history of the repository. Using -[`git-log(1)`][14] with a few arguments, you can get a pretty concise image of -how your projects are changing. - -Some commonly used options you might use might be: - -* `--stat`: Show the files and number of changes for each commit - -* `--oneline`: Show each commit on a single line - -* `--summary`: Show condensed summary of header information - -### [`git-pull(1)`][15] and [`git-fetch(1)`][18] ### - -[`git-pull(1)`][15] is used to pull remote changes into your current working -copy. I prefer not use [`git-pull(1)`][15] because I find it to be slightly -[harmful][19]. Instead, I use either [`git-fetch(1)`][18] or a form of -[`git-remote(1)`][17]. - -[`git-fetch(1)`][18] is a similar command used for "fetching" remote changes, -but does not attempt to automatically merge them into the local branch. - -### [`git-push(1)`][16] ### - -[`git-push(1)`][16] will send your changes to the remote location. By default, -this command will not attempt to overwrite the remote if the changes cannot be -applied with a "fast-forward" merge operation. - -### [`git-remote(1)`][17] ### - -[`git-remote(1)`][17] is an overall "remote" management command. It allows you -to add remotes, rename remotes, and even fetch information about remotes. -"Remotes" are non-local/upstream sources of changes. The remote "origin" is the -default name for the remote of a clone. This could be a co-worker's repository -or it could be the central repository of the project. - -With the [`git-remote(1)`][17] command, we can add a new remote: - - ± git remote add upstream proto://remote - -We can rename a remote: - - ± git remote rename origin upstream - -And my favorite, we can fetch changes from the remote: - - ± git remote update -p origin - -I use this last command so much, in fact, I have created an alias in my -`~/.gitconfig` file: - - [alias] - up = !git remote update -p - -This way, I can decide when and _how_ I want to merge the upstream work into my -local copy. - -The above commands along with `git-add` and `git-commit` will cover the -majority of Git related tasks, as far as simple, non-branching workflows are -concerned. - -For more advanced usage of Git, we can continue to learn about code branching, -git branches, and merging techniques. - -## Branches ## - -Git branches are actually a very simplistic concept in both implementation and -intuition. Code and applications versioned by any version control tool have -their implicit branching points: when one user commits code that another user -isn't yet made aware, the code has diverged from a single path of existence to -multiple paths. This is a form of implicit branching and explicit branching -isn't much different. - -{{< figure src="/media/code-branching.png" alt="Code Branching" >}} - -The structure of Git makes branching trivial, in fact, all that's required is -to create a file that marks the branch point of the code. That is, to create a -file under `.git/refs/heads` that contains the branch's base commit hash. From -there, the code can safely move forward without changing anything of the other -branches. - -{{< figure src="/media/git-branching-1.png" alt="Git Code Branching" >}} - -Branching in Git is accomplished with [`git-branch(1)`][20] and -[`git-checkout(1)`][21]. - -The basic form of [`git-branch(1)`][20] is the following: - - ± git branch {branch_name} [optional branch point] - -If the branch point is not specified, [`git-branch(1)`][20] defaults to the -`HEAD` reference. - -Once the branch is created, you can switch to it using the -[`git-checkout(1)`][21] command: - - ± git checkout {branch_name} - -Moreover, if you're going to be creating a branch and immediately switching to -it, you can use the `-b` flag of [`git-checkout(1)`][21] to do these two steps -in one: - - ± git checkout -b {branch_name} [optional branch point] - -## Merging ## - -Once you're ready to merge your changes from one branch into another branch, -you can use the [`git-merge(1)`][22] command to accomplish that. - -There are a few different ways Git can merge your work between two branches. -The first Git will try is called "fast-forward" merging, where Git will attempt -to play the source branch's commits against the target branch, from the common -history point forward. - -{{< figure src="/media/git-ff-merge-1.png" alt="Git Fast Forward Merge 1" >}} - -{{< figure src="/media/git-ff-merge-2.png" alt="Git Fast Forward Merge 2" >}} - -However, this can only be accomplished if the target branch doesn't have any -changes of its own. - -If the target branch _does_ have changes that are not in the source branch, -Git will attempt to merge the trees and will create a merge commit (assuming -all went well). If a merge conflict arises, the user will need to correct it, -and attempt to re-apply the merge, the resolution of the merge will be in the -merge commit. For more information on merging, see the [`git-merge(1)`][22] -documentation. - -{{< figure src="/media/git-resolve-merge.png" alt="Git parent merge" >}} - -## Summary ## - -Git is not the most complicated version control system out there, and I hope -peering into the internals of Git demonstrates that fact. If anything, it may -seem that Git is very simplistic and unintelligent. But this is actually what -gives Git its power. It's simplistic (recursive) object storage is what gives -Git super powers. Git can infer file renames, branching is trivial, merging is -similarly easier, the storage and tree model are well understood concepts and -the tree and graph algorithms are well studied. - -However, this simplistic approach to storage also has a few problems. Tracking -binary files tends to be expensive because Git isn't storing the difference, -but each version of the file in its entirety. The zlib compression library also -isn't always amazing at compressing binary files either. - -Beyond these problems, Git is a very powerful and capable source control tool. - -### References ### - -[1]: http://git-scm.com/ - -* [Git SCM Site][1] - -[2]: http://git-scm.com/book/en/v2 - -* [Apress: Pro Git][2] - -[3]: https://en.wikipedia.org/wiki/Zlib - -* [zlib compression][3] - -[4]: https://www.kernel.org/pub/software/scm/git/docs/git-init.html - -[5]: https://www.kernel.org/pub/software/scm/git/docs/git-hash-object.html - -[6]: https://www.kernel.org/pub/software/scm/git/docs/git-update-index.html - -[7]: https://www.kernel.org/pub/software/scm/git/docs/git-write-tree.html - -[8]: https://www.kernel.org/pub/software/scm/git/docs/git-commit-tree.html - -[9]: https://www.kernel.org/pub/software/scm/git/docs/git-update-ref.html - -[10]: https://www.kernel.org/pub/software/scm/git/docs/git-add.html - -[11]: https://www.kernel.org/pub/software/scm/git/docs/git-commit.html - -[12]: https://www.kernel.org/pub/software/scm/git/docs/git-clone.html - -[13]: https://www.kernel.org/pub/software/scm/git/docs/git-status.html - -[14]: https://www.kernel.org/pub/software/scm/git/docs/git-log.html - -[15]: https://www.kernel.org/pub/software/scm/git/docs/git-pull.html - -[16]: https://www.kernel.org/pub/software/scm/git/docs/git-push.html - -[17]: https://www.kernel.org/pub/software/scm/git/docs/git-remote.html - -[18]: https://www.kernel.org/pub/software/scm/git/docs/git-fetch.html - -[19]: http://stackoverflow.com/questions/15316601/in-what-cases-could-git-pull-be-harmful#15316602 - -* [SO: Cases `git-pull` could be considered harmful][19] - -[20]: https://www.kernel.org/pub/software/scm/git/docs/git-branch.html - -[21]: https://www.kernel.org/pub/software/scm/git/docs/git-checkout.html - -[22]: https://www.kernel.org/pub/software/scm/git/docs/git-merge.html - -[23]: https://kennyballou.com/git-in-reverse.pdf diff --git a/blag/content/blog/git-resurrecting-history.markdown b/blag/content/blog/git-resurrecting-history.markdown deleted file mode 100644 index 9fb0123..0000000 --- a/blag/content/blog/git-resurrecting-history.markdown +++ /dev/null @@ -1,525 +0,0 @@ ---- -title: "Git: Resurrecting History" -description: "" -tags: - - "Git" - - "Tips and Tricks" - - "How-to" -date: "2016-09-14" -categories: - - "Development" -slug: "git-resurrecting-history" ---- - -We all make mistakes. They are inevitable. We must accept that we make them and -move on. But making mistakes in Git seems to be overly complex to resolve and -most will simply result to cloning anew and copying the working tree (or some -subset) and moving on. This, to me, however, seems like a waste of bandwidth as -most issues resulting in broken history are in fact quite easy to resolve, -especially so once the necessary tools are known. - -## Git Reflog ## - -> Reference logs or "reflogs", record when the tips of branches and other -> references were updated in the local repository. ---[`git-reflog(1)`][1] - -That is, the reference log is the (meta)log of the actions against branches -(tips) and other [references][2]. Every time we commit, merge, change branches, -or perform _any_ action that might alter the commit a reference points to, this -change is stored in the reflog of the current repository. For a freshly cloned -repository, the reflog will be quite boring, e.g., a single entry for the -initial clone. - -However, after working on a project for a while, the reflog will have quite the -history of actions performed. - -For example, here is the first 24 lines of the reflog for this blog's -repository: - - a1bbd00 HEAD@{0}: checkout: moving from master to git_resurrection - a1bbd00 HEAD@{1}: commit: Update paths of SSL certificate and key - d7fd8f8 HEAD@{2}: commit: Add all targets to phony - f639cbe HEAD@{3}: commit: Add phony target list - 8f3bba4 HEAD@{4}: commit: Add build to deploy dependency - 5331695 HEAD@{5}: merge elixir_releases: Fast-forward - 1a27df5 HEAD@{6}: checkout: moving from elixir_functional_fib to master - 61f755b HEAD@{7}: checkout: moving from master to elixir_functional_fib - 1a27df5 HEAD@{8}: checkout: moving from elixir_releases to master - 5331695 HEAD@{9}: rebase -i (finish): returning to refs/heads/elixir_releases - 5331695 HEAD@{10}: rebase -i (squash): Add Elixir OTP Releases Post - 07f3995 HEAD@{11}: rebase -i (squash): # This is a combination of 4 commits. - 9b7bc7b HEAD@{12}: rebase -i (squash): # This is a combination of 3 commits. - 06414a7 HEAD@{13}: rebase -i (squash): # This is a combination of 2 commits. - cb59962 HEAD@{14}: rebase -i (start): checkout HEAD~5 - bf8836f HEAD@{15}: commit: WIP: elixir otp releases - 34bc98a HEAD@{16}: commit: WIP: update ends - 00fc016 HEAD@{17}: commit: WIP: elixir otp releases - e859353 HEAD@{18}: commit: WIP: elixir otp release post - cb59962 HEAD@{19}: commit: WIP: elixir otp releases post - 1a27df5 HEAD@{20}: checkout: moving from master to elixir_releases - 1a27df5 HEAD@{21}: checkout: moving from elixir_functional_fib to master - 61f755b HEAD@{22}: commit: WIP: some post about fib - 4137e6e HEAD@{23}: checkout: moving from master to elixir_functional_fib - -The first column is the commit SHA-1 that is the _result_ of the action, the -second column provides a shortcut reference that can be used anywhere a regular -reference can be, the 3rd column is the action, e.g., `checkout`, `commit`, -`merge`, etc., and a short description of the action. In the case of commits, -the description text will be the summary line of the commit message. - -From the reflog, we can see I've recently made a branch for this post, before -that, I made several commits against the `master` branch, and before that, I -performed a fast-forward merge of the local `elixir_releases` branch into the -`master` branch. Etc. - -This is some pretty powerful information for digging into the history of the -repository. The reflog is indispensable for working out how to recover lost -changes. - -## Git Fsck ## - -[`git-reflog(1)`][1] is a very useful tool, but, another way history can be -lost is by becoming "unreachable". - -This is where [`git-fsck(1)`][3] can help! [`git-fsck(1)`][3] searches the Git -object store, and will report objects that are dangling or unreachable from a -named reference. This way, we can find commits, or even blobs, that have been -lost to us because they do not exist in the directed acyclic graph (DAG) of -Git, but _do_ exist in the object store itself. - -For example, running `git fsck` on this repository yields the following output: - - ± git fsck - Checking object directories: 100% (256/256), done. - Checking objects: 100% (150/150), done. - dangling commit 16f6063abde9dcd8279fb2a7ddd4998aaf44acc7 - -Now, if we add another option, namely, `--unreachable`, we get the following: - - ± git fsck --unreachable - unreachable blob 20c1e21948ab5d9553c11fa8a7230d73055c207e - unreachable commit 16f6063abde9dcd8279fb2a7ddd4998aaf44acc7 - unreachable commit 41a324739bc3f1d265ecc474c58256e3a4ad4982 - unreachable blob c4131dc6d091b1c16943554fa2396f5d405e8537 - -Furthermore, objects listed in the reflog are considered "reachable", but may -be still eluding our search. Adding `--no-reflogs` to [`git-fsck(1)`][3] can -help make these objects more visible: - - ± git fsck --unreachable --no-reflogs - unreachable commit 00fc0164a78fe6b46e56781d434fdbb893f11534 - unreachable blob 18a484273f75e4a3dcac75cb5229a614f6090be0 - unreachable commit 1cdc30ebd6ebbaba4a8c28fb35457a8d5cb4326f - unreachable blob 27c4af632030e3d794181024fba120c6db44eef5 - unreachable commit 31a0e98166bc48bf1f725a657e27632c99568da0 - unreachable commit 34bc98ae27f3db69df82b186cf2ef8a86b42ea12 - unreachable commit 8f08be163f185dd130a86d67daf61639632c4e20 - unreachable commit bf8836f2e435ee241ebe53f0eae4ee98bd887082 - unreachable commit 06414a75d58cee81fb2035b8af45a543c6bb09ef - unreachable blob 1f853af2881919bc62321b536bfc0de6e9602db6 - unreachable blob 20c1e21948ab5d9553c11fa8a7230d73055c207e - unreachable commit 54cd8b9b5c58409ce3f509e74d5a7a7ac4a73309 - unreachable commit a9693871e765355b6d9a57a612a76f454b177da0 - unreachable commit ad45856329ff97bd35ac17325952c21e53d51b28 - unreachable blob b8154e42d08b74ae6b9817e12b7764d55760c86e - unreachable commit cb599620e2d364e2ab44ada45f16df05c5fe3f51 - unreachable commit e859353ddc681177141d84a0053b9b8ecad1151e - unreachable blob fed50bb1d7c749767de7589cc8ef0acf8caf8226 - unreachable blob 056a7e48130d8d22227367ae9753cb5c9afe2d39 - unreachable commit 16f6063abde9dcd8279fb2a7ddd4998aaf44acc7 - unreachable commit 54def8ee3ea0c7043767185e5900480d24ddb351 - unreachable commit 65d2a1553e3c1dd745afa318135a5957e50dd6ef - unreachable commit 741afdc2f13e76bd0c48e1df7419b37e57733de3 - unreachable commit 7bb6b449ced0493f2d3cc975157aefa84b082e04 - unreachable commit 7e067ad694538a410f98732ce3052546aadc0240 - unreachable commit 809e9d1f131f54701325357199643505773f5d25 - unreachable blob 8802d6dcac8b14399ca4082987a76be4b179333c - unreachable blob 8b82ffa1eb05ef3306ab62e1120f77a80a887d94 - unreachable commit 9af67536e6852fe928934ba0950809597d73a173 - unreachable blob b23eefdac6b2056e25c748679958179bdbd8f81f - unreachable blob b66ef50f82242ec929141cf3246278c6160e230a - unreachable blob c2fa5a98fe1010a1255f032ba34a612e404c7062 - unreachable blob dd42939b3f6cf542064eb011b74749195c951957 - unreachable commit 07f39952cd161438ff4b208b6cb10b287881db85 - unreachable blob 1c0327c6a73923e932eb4f4bf877f660bd13a7b0 - unreachable commit 41a324739bc3f1d265ecc474c58256e3a4ad4982 - unreachable commit 74671b411e2cf1209bc681f0349e24ef7fe00f19 - unreachable commit 9437cbb0500b22a57a62e2cf0a512b1b56ce6a96 - unreachable commit 9a0f5f8c63c184cd5082f27dbe513b3e683bc1ad - unreachable commit 9b7bc7bf0f01a84621e23bfa02e0a09f63da1747 - unreachable commit bce7c8dbcc56e6935015a5fb2c74224bb8d9f768 - unreachable blob c4131dc6d091b1c16943554fa2396f5d405e8537 - unreachable blob c69782e19aee6d89de4f6bcf9ed14813f72c8c10 - unreachable blob d79fb0b95796290c33d6f3dee004235dad7d8893 - unreachable commit dabb01b3df1371602f3f0689d25359597db54423 - unreachable blob ec2ba85be58685070a44727bc2591b9a32eb6457 - -Using these hashes, one could inspect them using other [familiar tools][4], -namely, [`git-show(1)`][5] and [`git-cat-file(1)`][6] to figure out if these -are worth resurrecting or even are in fact the objects we want to resurrect. - -## Resurrection Example ## - -Now that we have some tools, let's examine a situation where a change to the -history was made that needs to be corrected: deleting branch references. - -Let's assume we are working on a topic branch for some new awesome feature. -However, after some developing, we discover this solution might not be worth -pursuing anymore. In a fit of rage of our wasted effort, we dump the branch. - -Perhaps several days go by, and we discover we want to look back at something -we did in that previous branch for some reason or another, but we certainly -don't remember the commit hash of that branch. - -For concreteness, let's create a repository that will demonstrate this problem: - - $ cd $(mktemp -d) - $ git init foobar - $ cd foobar - ± touch foo - ± git add foo - ± git commit -m 'initial commit' - ± touch bar - ± git add bar - ± git commit -m 'add bar' - ± git log --oneline - 1cf706a add bar - 11d3501 initial commit - -> I created this example repository in a temporary directory because it's not -> likely to be useful after the demonstration of this problem. Feel free to -> create the repository wherever you please, provided you are following along. - -From here, we may decide to branch and start working on our epic topic branch: - - ± git checkout -b topic/epic_feature - ± echo 1 >> foo - ± git commit -am 'update foo: add 1' - ± echo 2 >> bar - ± git commit -am 'update bar: add 2' - ± touch foobar - ± git add foobar - ± git commit -m 'add foobar' - ± git log --oneline - 2e0bcc6 add foobar - f2239ca update bar: add 2 - 32d8e6d update foo: add 1 - 1cf706a add bar - 11d3501 initial commit - -From here, we decide that the `topic/epic_feature` branch is going anywhere but -the `master` branch. Therefore, we, swiftly, dump it into the ether: - - ± git checkout master - Switch to branch 'master' - ± git branch -D topic/epic_foobar - Deleted branch topic/epic_feature (was 2e0bcc6). - -Several days pass, we perform other commits on other branches, merge them into -`master`, decide on some other things to work on. But eventually, we are -reminded that our old `topic/epic_feature` branch had something similar to what -we are doing now. It would be nice to recover it and its changes for -examination. However, we likely lost the commit hash of the branch. - -### Solution ### - -If we take a quick look at our `git-fsck` output, we might see something that -may lead us to our commit hash: - - ± git fsck - Checking object directories: 100% (256/256), done. - -Well, that was less than helpful. What happened? Turns out, as mentioned -above, `git-fsck` considers objects "reachable" if they are pointed to by a -reference _or_ are in the reflog. Let's add the `--no-reflogs` flag: - - ± git fsck --no-reflogs - Checking object directories: 100% (256/256), done. - dangling commit 2e0bcc62122f2d7bf895958ac8fed1ec05d4d904 - -This looks more promising! Let's checkout this hash and inspect it: - - ± git checkout 2e0bcc62122f2d7bf895958ac8fed1ec05d4d904 - - Note: checking out '2e0bcc62122f2d7bf895958ac8fed1ec05d4d904'. - - You are in 'detached HEAD' state. You can look around, make experimental - changes and commit them, and you can discard any commits you make in this - state without impacting any branches by performing another checkout. - - If you want to create a new branch to retain commits you create, you may - do so (now or later) by using -b with the checkout command again. Example: - - git checkout -b - - HEAD is now at 2e0bcc6... add foobar - ± git log --oneline - 2e0bcc6 add foobar - f2239ca update bar: add 2 - 32d8e6d update foo: add 1 - 1cf706a add bar - 11d3501 initial commit - -This indeed looks like the branch we created (several days) before. Git's -interface, as a helpful reminder, explains to us how to (re)create this point -as a reference (branch). It is, thus, our choice to examine the branch as-is, -or recreate the reference for later inspection. - -## Another Resurrection Example ## - -For another example, let's examine when we create a branch and change the -parent commit of the branch point. - -We will start with some commands that create and initialize the repository into -an initial state, that is, before any mistakes are made: - - $ cd $(mktemp -d) - $ git init foobar - $ cd foobar - ± touch foo - ± git add foo - ± git commit -m 'initial commit' - ± touch bar - ± git add bar - ± git commit -m 'add bar' - ± echo 1 >> foo - ± git commit -am 'update foo: add 1' - ± git checkout -b topic/foobar - ± echo 1 >> bar - ± git commit -am 'update bar: add 1' - -> Notice, again, I've created this repository in a temporary directory for my -> own system's tidyness. Futhermore, note `mktemp -d` will create a _different_ -> temporary directory. As such, the `foobar` project from this example and the -> previous example _will_ be different. - -From here, our one line log should look similar to the following: - - ± git log --oneline - 3de2659 update bar: add 1 - 5e6dd5f update foo: add 1 - 9640abb add bar - 31d2347 initial commit - -Furthermore, here is an image that describes the state of the repository. - -{{< figure src="/media/git-repo-state-1.svg" - alt="Example Repository State 1" >}} - -Next, we will create a few more commits, but instead of doing things properly, -we are going to (intentionally) make a mistake. We will merge our -`topic/foobar` branch into `master`, create a new file, `foobar`, and create a -branch, `topic/bad`, from `topic/foobar`. In the `topic/bad` branch, we will -create some new commits, but then we will squash the _two previous_ commits. - -Let's begin issuing commands against our repository: - - ± git checkout master - ± git merge --ff-only topic/foobar - ± touch foobar - ± git add foobar - ± git commit -m 'add foobar' - ± git checkout -b topic/bad topic/foobar - ± echo 2 >> foo - ± git commit -am 'update foo: add 2' - ± echo 2 >> bar - ± git commit -am 'update bar: add 2' - -Thusly, our repository should look similar to the following image: - -{{< figure src="/media/git-repo-state-2.svg" - alt="Example Repository State 2" >}} - -Now, for the mistake: - - ± git rebase -i HEAD~3 - (squash the previous commits) - pick 3de26 - squash 4babf - squash 7647f - -This should result in a repository that looks like the following: - -{{< figure src="/media/git-repo-state-3.svg" - alt="Example Repository State 3" >}} - -Assuming we didn't recognize the mistake, we might attempt to merge the branch: - - ± git checkout master - ± git merge --ff-only topic/bad - fatal: Not possible to fast-forward, aborting. - -Well, of course, the `master` branch is ahead by one commit, and the -`topic/bad` branch is "behind" by two. - -We can see this be viewing the logs when going from `master` to `topic/bad` and -then vice-versa: - - ± git log --oneline master..topic/bad - 3b71666 update bar: add 1 - ± git log --oneline topic/bad..master - 7387d60 add foobar - 3de2659 update bar: add 1 - -But another issue emerges from viewing these log outputs from our mistake -ignorant brains: two of the commits look the same, e.g., have the same commit -message. - -Not only have we combined two of our changes from `topic/bad` but we combined -them with a commit that was _already_ merged into the `master` branch. Assuming -`master` is a stable and "branchable" branch, we will not be able to simply -rebase one way and return, the commits are too intermingled. - -> Branchable, in this context, means the branch is safe to base work, no one on -> our team (or ourselves, if we practice proper discipline) will come behind us -> and change the history of this branch. This is an important assumption in -> _any_ distributed workflow. Every project should have (at least) one -> "branchable" reference, many choose this to be the `master` branch. - -### Solutions ### - -One way we can fix this is to simply not care. But that's not what we are -about: we like clean history, this situation and such a solution is clearly not -clean! - -Therefore, we will have to return the `topic/bad` branch to a clean state -before continuing with merging the work done in the branch. - -Let's start with examining the reflog: - - ± git reflog - 7387d60 HEAD@{0}: checkout: moving from topic/bad to master - 3b71666 HEAD@{1}: rebase -i (finish): returning to refs/heads/topic/bad - 3b71666 HEAD@{2}: rebase -i (fixup): update bar: add 1 - 4cc10e9 HEAD@{3}: rebase -i (fixup): # This is a combination of 2 commits. - 3de2659 HEAD@{4}: rebase -i (start): checkout HEAD~3 - 7647f9c HEAD@{5}: commit: update bar: add 2 - 4babfe7 HEAD@{5}: commit: update foo: add 2 - 3de2659 HEAD@{6}: checkout: moving from master to topic/bad - 7387d60 HEAD@{7}: commit: add foobar - 3de2659 HEAD@{8}: checkout: moving from topic/bad to master - 3de2659 HEAD@{9}: checkout: moving from master to topic/bad - 3de2659 HEAD@{10}: merge topic/foobar: Fast-forward - 5e6dd5f HEAD@{11}: checkout: moving from topic/foobar to master - 3de2659 HEAD@{12}: commit: update bar: add 1 - 5e6dd5f HEAD@{13}: checkout: moving from master to topic/foobar - 5e6dd5f HEAD@{14}: commit: update foo: add 1 - 9640abb HEAD@{15}: commit: add bar - 31d2347 HEAD@{16}: commit (initial): initial commit - -Examining `HEAD@{5}` we will see the commit of `topic/bad` _before_ we -attempted to rebase the three commits. If we start there, we may be able to -salvage the history. - - ± git checkout topic/bad - ± git reset --hard 7647f9c - ± git log --oneline - 7647f9c update bar: add 2 - 4babfe7 update foo: add 2 - 3de2659 update bar: add 1 - 5e6dd5f update foo: add 1 - 9640abb add bar - 31d2347 initial commit - -> Obligatory notice, blindly using `git reset --hard` can lead to dark, scary -> places. As with the first example in this post, `git reset --hard` is an even -> more subtle way to lose commits. Pause before pressing enter _everytime_ you -> type `git reset --hard`. - -Perfect, we are back to the state of the branch as seen in the following image: - -{{< figure src="/media/git-repo-state-2.svg" - alt="Example Repository State Before Mistake" >}} - -From here, we can merge the two branches however we please: rebase and -fast-forward or regular old merge commits. - -The first way of merging the two branches may proceed as follows: - - ± git branch - topic/bad - ± git rebase master - First, rewinding head to replay your work on top of it... - Applying: update foo: add 2 - Applying: update bar: add 2 - ± git checkout master - Switched to branch 'master' - ± git merge --ff-only topic/bad - Updating 7387d60..577aa0b - Fast-forward - bar | 1 + - foo | 1 + - 2 files changed, 2 insertions(+) - -Afterwards, our repository will look like the following figure: - -{{< figure src="/media/git-repo-state-4.svg" - alt="Example Repository State After Rebase Fast-Forward Merge" >}} - -> If we wanted to rebase the two commits from `topic/bad` together, we could -> have easily done so _right_ before switching to the `master` branch. - -Proceeding with a regular merge commit would proceed similar to the following: - - ± git checkout master - Switched to branch 'master' - ± git merge --no-ff topic/bad -m 'merge branch "topic/bad"' - Merge made by the 'recursive' strategy. - bar | 1 + - foo | 1 + - 2 files changed, 2 insertions(+) - -Afterwards, our repository will look like the following figure: - -{{< figure src="/media/git-repo-state-5.svg" - alt="Example Repository State After Merge Commit" >}} - -## Summary ## - -The best way to fix Git repository history is not to make mistakes in the first -place. However, since mistakes are inevitable, we must learn the tools to -discover, recover, and return to the appropriate state to correct our mistakes. -More importantly, we must learn the courage to make mistakes, knowing we have -an escape route. - -This way, we can avoid keeping around a `git.txt` file ([xkcd][9]) when our -repository eventually melts down. - -## References ## - -* [`git-reflog(1)`][1] - -* [Git SCM book, Internals Chapter][2] - -* [`git-fsck(1)`][3] - -* [Git in Reverse][4] - -* [`git-show(1)`][5] - -* [`git-cat-file(1)`][6] - -* [`git-reset(1)`][7] - -* [`git-rebase(1)`][8] - -* [XKCD: Git][9] - -[1]: https://www.kernel.org/pub/software/scm/git/docs/git-reflog.html - -[2]: https://git-scm.com/book/en/v2/Git-Internals-Git-References - -[3]: https://www.kernel.org/pub/software/scm/git/docs/git-fsck.html - -[4]: https://kennyballou.com/blog/2016/01/git-in-reverse/ - -[5]: https://www.kernel.org/pub/software/scm/git/docs/git-show.html - -[6]: https://www.kernel.org/pub/software/scm/git/docs/git-cat-file.html - -[7]: https://www.kernel.org/pub/software/scm/git/docs/git-reset.html - -[8]: https://www.kernel.org/pub/software/scm/git/docs/git-rebase.html - -[9]: https://xkcd.com/1597 diff --git a/blag/content/blog/hunk_editing.markdown b/blag/content/blog/hunk_editing.markdown deleted file mode 100644 index 314aa83..0000000 --- a/blag/content/blog/hunk_editing.markdown +++ /dev/null @@ -1,282 +0,0 @@ ---- -title: "The Art of Manually Editing Hunks" -description: "How to edit hunk diffs" -tags: - - "Git" - - "How-to" - - "Tips and Tricks" -date: "2015-10-24" -updated: "2015-10-24" -categories: - - "Development" -slug: "art-manually-edit-hunks" ---- - -There's a certain art to editing hunks, seemingly arcane. Hunks are blocks of -changes typically found in unified diff patch files, or, more commonly today, -found in Git patches. - -Git uses its own variant of the [unified diff format][1], but it isn't much -different. The differences between the unified format and Git's are usually not -significant. The patch files created with [`git-show`][4] or [`git-diff`][2] -are consumable by the usual tools, `patch`, `git`, `vimdiff`, etc. - -## Short Introduction to Unified Diff ## - -A unified diff may look something similar to (freely copied from the -`diffutils` manual): - - --- lao 2002-02-21 23:30:39.942229878 -0800 - +++ tzu 2002-02-21 23:30:50.442260588 -0800 - @@ -1,7 +1,6 @@ - -The Way that can be told of is not the eternal Way; - -The name that can be named is not the eternal name. - The Nameless is the origin of Heaven and Earth; - -The Named is the mother of all things. - +The named is the mother of all things. - + - Therefore let there always be non-being, - so we may see their subtlety, - And let there always be being, - @@ -9,3 +8,6 @@ - The two are the same, - But after they are produced, - they have different names. - +They both may be called deep and profound. - +Deeper and more profound, - +The door of all subtleties! - -The first two lines define the files that are input into the `diff` program, -the first, `lao`, being the "source" file and the second, `tzu`, being the -"new" file. The starting characters `---` and `+++` denote the lines from each. - -`+` denotes a line that will be added to the first file and `-` denotes a line -that will be removed from the first file. Lines with no changes are preceded by -a single space. - -The `@@ -1,7 +1,6 @@` and `@@ -9,3 +8,6 @@` are the hunk identifiers. That is, -diff hunks are the blocks identified by `@@ -line number[,context] +line -number[, context] @@` in the diff format. The `context` number is optional and -occasionally not needed. However, it is always included in when using -`git-diff`. The line numbers defines the number the hunk begins. The context -number defines the number of lines in the hunk. Unlike the line number, it -often differs between the two files. In the first hunk of the example above, -the context numbers are `7` and `6`, respectively. That is, lines preceded with -a `-` and a space equals 7. Similarly, lines starting with a `+` and a space -equals 6. - -> Lines starting with a space count towards the context of both files. - -Since the second file has a smaller context, this means we are removing more -(by one) lines than we are adding. To `diff`, updating a line is the same as -removing the old line and adding a new line (with the changes). - -Armed with this information, we can start editing hunks that can be cleanly -applied. - -## Motivation ## - -What might be the motivation for even wanting to edit hunk files? The biggest I -see is when using `git-add --patch`. Particularly when the changes run together -and cannot be split apart automatically. We can see this in the diff above. - -The trivial case is being able to stage a single hunk of the above diff, -nothing has to be done to stage the changes separately other than using the -`--patch` option. - -However, staging separate changes inside a hunk becomes slightly more -complicated. Often, if the changes are broken up with a even just a single -line (if it exists), they can be split. When they run together, it becomes more -difficult to do. - -Of course, a way to solve this problem, is to manually back out the changes (a -series of "undos"), save the file, stage it, play back the changes (a series of -"redos", perhaps). This can be very error prone and if you make any other -changes during between undo and redo, you may have lost the changes. Therefore, -being able to manually edit the specific hunk into the right shape, no changes -are lost. - -## Hunk Editing Example ## - -Let's walk through an example of staging some changes, and manually editing a -hunk to stage them into the patches we want. - -Create a temporary Git repository, this will be a just some basic stuff for -testing. - - % cd /tmp - % git init foo - % cd foo - -> From here on, we will assume the working directory to be `/tmp/foo`. - -Inside this new Git repository, add a new file, `quicksort.exs`: - - defmodule Quicksort do - - def sort(list) do - _sort(list) - end - - defp _sort([]), do: [] - defp _sort(list = [h|t]) do - _sort(Enum.filter(list, &(&1 < h))) ++ [h] ++ _sort(Enum.filter(list, &(&1 > h))) - end - - end - -Perform the usual actions, `git-add` and `git-commit`: - - % git add quicksort.exs - % git commit -m 'initial commit' - -Now, let's make some changes. For one, there's compiler warning about the -unused variable `t` and the actually sorting seems a bit dense. Let's fix the -warning and breakup the sorting: - - defmodule Quicksort do - - def sort(list) do - _sort(list) - end - - defp _sort([]), do: [] - defp _sort(list = [h|_]) do - (list |> Enum.filter(&(&1 < h)) |> _sort) - ++ [h] ++ - (list |> Enum.filter(&(&1 > h)) |> _sort) - end - - end - -Saving this version of the file should produce a diff similar to the following: - - diff --git a/quicksort.exs b/quicksort.exs - index 97b60b4..ed2446b 100644 - --- a/quicksort.exs - +++ b/quicksort.exs - @@ -5,8 +5,10 @@ defmodule Quicksort do - end - - defp _sort([]), do: [] - - defp _sort(list = [h|t]) do - - _sort(Enum.filter(list, &(&1 < h))) ++ [h] ++ _sort(Enum.filter(list, &(&1 > h))) - + defp _sort(list = [h|_]) do - + (list |> Enum.filter(&(&1 < h)) |> _sort) - + ++ [h] ++ - + (list |> Enum.filter(&(&1 > h)) |> _sort) - end - - end - -However, since these changes are actually, argubly, two different changes, they -should live in two commits. Let's stage the change for `t` to `_`: - - % git add --patch - -We will be presented with the diff from before: - - diff --git a/quicksort.exs b/quicksort.exs - index 97b60b4..ed2446b 100644 - --- a/quicksort.exs - +++ b/quicksort.exs - @@ -5,8 +5,10 @@ defmodule Quicksort do - end - - defp _sort([]), do: [] - - defp _sort(list = [h|t]) do - - _sort(Enum.filter(list, &(&1 < h))) ++ [h] ++ _sort(Enum.filter(list, &(&1 > h))) - + defp _sort(list = [h|_]) do - + (list |> Enum.filter(&(&1 < h)) |> _sort) - + ++ [h] ++ - + (list |> Enum.filter(&(&1 > h)) |> _sort) - end - - end - Stage this hunk [y,n,q,a,d,/,e,?]? - -First thing we want to try is using the `split(s)` option. However, this is an -invalid choice because Git does not know how to split this hunk and we will be -presented with the available options and the hunk again. The option we then -want is `edit(e)`. - -We will be dropped into our default editor, environment variable `$EDITOR`, Git -`core.editor` setting. From there, we will be presented with something of the -following: - - # Manual hunk edit mode -- see bottom for a quick guide - @@ -5,8 +5,10 @@ defmodule Quicksort do - end - - defp _sort([]), do: [] - - defp _sort(list = [h|t]) do - - _sort(Enum.filter(list, &(&1 < h))) ++ [h] ++ _sort(Enum.filter(list, &(&1 > h))) - + defp _sort(list = [h|_]) do - + (list |> Enum.filter(&(&1 < h)) |> _sort) - + ++ [h] ++ - + (list |> Enum.filter(&(&1 > h)) |> _sort) - end - - end - # --- - # To remove '-' lines, make them ' ' lines (context). - # To remove '+' lines, delete them. - # Lines starting with # will be removed. - # - # If the patch applies cleanly, the edited hunk will immediately be - # marked for staging. If it does not apply cleanly, you will be given - # an opportunity to edit again. If all lines of the hunk are removed, - # then the edit is aborted and the hunk is left unchanged. - -From here, we want to replace the leading minus of the change removal to a -space and remove the last three additions. - -That is, we want the diff to look like: - - @@ -5,8 +5,10 @@ defmodule Quicksort do - end - - defp _sort([]), do: [] - - defp _sort(list = [h|t]) do - sort(Enum.filter(list, &(&1 < h))) ++ [h] ++ _sort(Enum.filter(list, &(&1 > h))) - + defp _sort(list = [h|_]) do - end - - end - -Saving and closing the editor now, Git will have staged the desired diff. We -can check the staged changes via `git-diff`: - - % git diff --cached - diff --git a/quicksort.exs b/quicksort.exs - index 97b60b4..94a5101 100644 - --- a/quicksort.exs - +++ b/quicksort.exs - @@ -5,8 +5,8 @@ defmodule Quicksort do - end - - defp _sort([]), do: [] - - defp _sort(list = [h|t]) do - _sort(Enum.filter(list, &(&1 < h))) ++ [h] ++ _sort(Enum.filter(list, &(&1 > h))) - + defp _sort(list = [h|_]) do - end - - end - -Notice, the hunk context data was updated correctly to match the new changes. - -From here, commit the first change, and then add and commit the second change. - -Something to watch out for is overzealously removing changed lines. For -example, in Elixir quicksort example we have just did, if we entirely removed -the second `-` from the diff _and_ manually updated the hunk header, the patch -will never apply cleanly. Therefore, be especially careful with removing `-` -lines. - -[1]: https://www.gnu.org/software/diffutils/manual/html_node/Unified-Format.html - -[2]: https://www.kernel.org/pub/software/scm/git/docs/git-diff.html - -[3]: https://www.gnu.org/licenses/fdl.html - -[4]: https://www.kernel.org/pub/software/scm/git/docs/git-show.html diff --git a/blag/content/blog/vim_tips_2015_03_17.markdown b/blag/content/blog/vim_tips_2015_03_17.markdown deleted file mode 100644 index 7bdb57d..0000000 --- a/blag/content/blog/vim_tips_2015_03_17.markdown +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: "Vim Tips 2015-03-17" -description: "First Vim tip post of a series of unknown size" -tags: - - "Vim" - - "Tips and Tricks" - - "Editors" -date: "2015-03-17" -categories: - - "Development" - - "Editors" - - "Tools" -slug: "vim-tips-2015-03-17" ---- - -This is the start of a series of posts about better Vim usage. It's yet to be -determined how often and how frequent this will run, but expect more than just -this first post. - -## Folds ## - -When using manual folding, creating a fold in Vim is as easy as one command: - -In normal mode, `zf`. - -For example, while in Java code and on a Method opening curly brace, type -`zf%`. - -And a fold is now created around that method. You can toggle it open and closed -with `za`, you can also open it with `zo` and you can close it with `zc`. - -Similarly, to remove a fold created by `zf`, use `zd` to remove -folds. - -From the above example, while in normal mode, typing `zd%` will remove the fold -for the method's block. - -For more information about folding and the other modes, visit the [Vim wiki -page][2] on folding. - -## Substitution Range ## - -Here are some more explicit examples of the range options provided when doing -substitutions: - -To substitute the first occurrence of the pattern in the current line: - - :s/foo/bar - -All occurrences: - - :s/foo/bar/g - -Entire file, first occurrence: - - :%s/foo/bar/ - -Entire file, all occurrences: - - :%s/foo/bar/g - -Now for something completely different, specific ranges: - - :,s/foo/bar[/g] - -End today's kicker: changing from a line to the end of the file: - - :,$s/foo/bar[/g] - -Visit [Vim Ranges][3] to view more information about the ranges available in -Vim. - -## References ## - -[1]: http://zzapper.co.uk/vimtips.html - -* [Vim Tips][1] - -[2]: http://vim.wikia.com/wiki/Folding - -* [Vim Wikia: Folding][2] - -[3]: http://vim.wikia.com/wiki/Ranges - -* [Vim Wikia: Ranges][3] diff --git a/blag/content/blog/vim_tips_2015_05_07.markdown b/blag/content/blog/vim_tips_2015_05_07.markdown deleted file mode 100644 index a00c2b6..0000000 --- a/blag/content/blog/vim_tips_2015_05_07.markdown +++ /dev/null @@ -1,141 +0,0 @@ ---- -title: "Vim Tips 2015-05-07" -description: "Vim Tips: Visual Mode and Macros" -tags: - - "Vim" - - "Tips and Tricks" - - "Editors" -date: "2015-05-07" -categories: - - "Development" - - "Editors" - - "Tools" -slug: "vim-tips-2015-05-7" ---- - -Many Vim users may have, accidentally or not, discovered the dot (`.`) command -in Vim. It's a main stay for a lot of Vim users and is clearly one of those -actions that should be in more editors. Except when it is the wrong action for -the job. - -More often than not, the visual selection mode and one-off macros are a better -choice. - -## Visual Mode ## - -I won't go into all of the cool things that can be accomplished with Vim's -visual mode, but I will showcase a few examples where the visual editor is -clearly a better choice than the dot (`.`). - -Visual mode offers, in essence, a multi-line cursor for which you can do a lot -of changes, quickly. - -### Visual Mode Basics ### - -To enter visual mode, it is as simple as pressing `^v` or `ctrl-v`. Next you -will want to select what you want to change with your typical movement commands -(`h`, `j`, `k`, `l`, and of course `w`, `e` and all the rest). Finally, you -finish with the action: `I` if you want to insert before the selection, `A` if -you want to append after the selection, `C` if you want to change the -selection, and `d`, `D`, `x` if you want to remove the selection, just to name -a few. - -### Some Examples ### - -For (a bad) example, if you need to comment out a contiguous set of lines, you -can easily accomplish this with the visual mode. - -{{< video "/media/videos/comment.ogg" "video/ogg" 600 400 >}} - -A related example to prefixing is indentation changes, I often use the visual -mode to fix code indentation when changing block-level indentation or when -copying code into a markdown file. - -{{< video "/media/videos/indent.ogg" "video/ogg" 600 400 >}} - -As another example, if you need to change a single word in multiple columns, -visual mode will make easy work of this (especially when the columns are -aligned, if not see macros below or [substitution ranges][2] from the previous -tip). - -{{< video "/media/videos/cw.ogg" "video/ogg" 600 400 >}} - -For more information on Visual Mode, you can check Vim's [visual][1] help -document. - -## Macros ## - -For when visual mode may not be enough or when the change is repetitive in -operations but not in columns or what have you, it's time to bust out the -macros. Vim macros are easily available for use you can use all the registers -to record and store each macro. - -### Macro Basics ### - -To record a macro, it's as simple as `qq`. That is, -press `q`, select a register (a-z1-0), enter your commands as if you were using -Vim normally, and finally `q` again to finish. Once your macro is recorded, you -can use it with `@`. And, like most Vim commands, you can -attach a repetition to it: `@` where `` is the number of -times to repeat the command. - -You can also replay the last macro with `@@`. - -### Some Examples ### - -As a simplistic example, we can use a macro to convert it into, say, JSON (this -example is clearly taken from the [Vim Wikia][3]). - -Let's say we have the following data: - - one first example - two second example - three third example - four fourth example - -And we want to change it to the following: - - data = { - 'one': 'first example', - 'two': 'second example', - 'three': 'third example', - 'four': 'fourth example', - } - -We can do this by performing the following: - -First, we want to start recording our macro. While the cursor is under the 'o' -of 'one', we will press `qd` to record our macro to the `d` register. - -Next, we will want to change the tabbing by performing a substitution: - - :s/\s\+/': ' - -Then, we will insert our first tick with: - - I' - -And append the last tick and comma with: - - A', - -Before we finish recording, one of the more important operations of making -macros repeatable is moving the cursor to the next line and putting it into the -correct position for the next execution. Therefore, move the cursor the begging -of the line and move down one line: - - 0j - -Finally, press `q` to finish recording. - -We should now be able to press `3@d` and watch as the rest of the lines change. - -To finish up the example, we'll manually enter `data = {` and the tailing `}`. - -{{< video "/media/videos/macros.ogg" "video/ogg" 600 400 >}} - -[1]: http://vimdoc.sourceforge.net/htmldoc/visual.html - -[2]: https://kennyballou.com/blog/2015/03/vim-tips-2015-03-17/ - -[3]: http://vim.wikia.com/wiki/Macros diff --git a/blag/layouts/404.html b/blag/layouts/404.html deleted file mode 100644 index 9e680c9..0000000 --- a/blag/layouts/404.html +++ /dev/null @@ -1,7 +0,0 @@ -{{ partial "header.html" . }} - -

You Seem Lost...

-

The page you were looking for seems to have left the server or never -existed. If you think this may be an error, sorry. The web server thinks you -have an error.

-{{ partial "footer.html" . }} diff --git a/blag/layouts/_default/list.html b/blag/layouts/_default/list.html deleted file mode 100644 index e69de29..0000000 diff --git a/blag/layouts/_default/single.html b/blag/layouts/_default/single.html deleted file mode 100644 index 73f61b8..0000000 --- a/blag/layouts/_default/single.html +++ /dev/null @@ -1,11 +0,0 @@ -{{ partial "header.html" . }} - -{{ partial "subheader.html" . }} -
-

{{ .Title }}

-
- {{ .Content}} -
-
- -{{ partial "footer.html" . }} diff --git a/blag/layouts/blog/li.html b/blag/layouts/blog/li.html deleted file mode 100644 index e69de29..0000000 diff --git a/blag/layouts/blog/single.html b/blag/layouts/blog/single.html deleted file mode 100644 index f07be03..0000000 --- a/blag/layouts/blog/single.html +++ /dev/null @@ -1,17 +0,0 @@ -{{ partial "header.html" . }} - -{{ partial "subheader.html" . }} -
-

{{ .Title }}

- - {{ .Content}} -
-{{ partial "footer.html" . }} diff --git a/blag/layouts/blog/summary.html b/blag/layouts/blog/summary.html deleted file mode 100644 index b2c4925..0000000 --- a/blag/layouts/blog/summary.html +++ /dev/null @@ -1,20 +0,0 @@ -
-
-

{{ .Title }}

- -
- -
- {{ .Summary }} -
-
    -
  • - {{ range .Params.tags }} -
  • {{ . }}
  • - {{ end }} -
- - -
diff --git a/blag/layouts/index.html b/blag/layouts/index.html deleted file mode 100644 index aa4f49d..0000000 --- a/blag/layouts/index.html +++ /dev/null @@ -1,9 +0,0 @@ -{{ partial "header.html" . }} - -{{ partial "subheader.html" . }} -{{ range .Data.Pages }} - {{ if eq .Section "blog" }} - {{ .Render "summary" }} - {{ end }} -{{ end }} -{{ partial "footer.html" . }} diff --git a/blag/layouts/partials/footer.html b/blag/layouts/partials/footer.html deleted file mode 100644 index ce176f9..0000000 --- a/blag/layouts/partials/footer.html +++ /dev/null @@ -1,14 +0,0 @@ - - - diff --git a/blag/layouts/partials/head_includes.html b/blag/layouts/partials/head_includes.html deleted file mode 100644 index 90d1f49..0000000 --- a/blag/layouts/partials/head_includes.html +++ /dev/null @@ -1,3 +0,0 @@ - - diff --git a/blag/layouts/partials/header.html b/blag/layouts/partials/header.html deleted file mode 100644 index 329debc..0000000 --- a/blag/layouts/partials/header.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - {{ partial "meta.html" . }} - - - {{ .Site.Title }} - - - {{ if .RSSlink }} - - {{ end }} - - {{ partial "head_includes.html" . }} - - - diff --git a/blag/layouts/partials/meta.html b/blag/layouts/partials/meta.html deleted file mode 100644 index ec09d41..0000000 --- a/blag/layouts/partials/meta.html +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/blag/layouts/partials/subheader.html b/blag/layouts/partials/subheader.html deleted file mode 100644 index 62dcc29..0000000 --- a/blag/layouts/partials/subheader.html +++ /dev/null @@ -1,43 +0,0 @@ - diff --git a/blag/layouts/rss.xml b/blag/layouts/rss.xml deleted file mode 100644 index 5950804..0000000 --- a/blag/layouts/rss.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - {{ .Title }} on {{ .Site.Title }} - Hugo - {{ .Permalink }} - {{ with .Site.LanguageCode }}{{ . }}{{ end }} - {{ with .Site.Author.name }}{{ . }}{{ end }} - {{ with .Site.Copyright }}{{ . }}{{ end }} - {{ .Date.Format "Mon, 02 Jan 2006 15:04:05 MST" }} - {{ range first 15 .Data.Pages }} - - {{ .Title }} - {{ .Permalink }} - {{ .Date.Format "Mon, 02 Jan 2006 15:04:05 MST" }} - {{ with .Site.Author.name }}{{ . }}{{ end }} - {{ .Permalink }} - {{ .Content | html }} - - {{ end }} - - diff --git a/blag/layouts/shortcodes/video.html b/blag/layouts/shortcodes/video.html deleted file mode 100644 index f664012..0000000 --- a/blag/layouts/shortcodes/video.html +++ /dev/null @@ -1,6 +0,0 @@ -
- -
diff --git a/blag/layouts/single.html b/blag/layouts/single.html deleted file mode 100644 index 5c0336c..0000000 --- a/blag/layouts/single.html +++ /dev/null @@ -1,5 +0,0 @@ -{{ partial "header.html" . }} - - {{ .Content}} - -{{ partial "header.html" . }} diff --git a/blag/layouts/sitemap.xml b/blag/layouts/sitemap.xml deleted file mode 100644 index 7f86e0c..0000000 --- a/blag/layouts/sitemap.xml +++ /dev/null @@ -1,16 +0,0 @@ - - {{ range .Data.Pages }} - - {{ .Permalink }} - - {{ safeHTML ( .Date.Format "2006-01-02T15:04:05-7:00" ) }} - - {{ with .Sitemap.ChangeFreq }} - {{ . }} - {{ end }} - {{ if ge .Sitemap.Priority 0.0 }} - {{ .Sitemap.Priority }} - {{ end }} - - {{ end }} - diff --git a/blag/static/css/site.css b/blag/static/css/site.css deleted file mode 100644 index c854321..0000000 --- a/blag/static/css/site.css +++ /dev/null @@ -1,130 +0,0 @@ -html, body { - box-sizing: border-box; - font: 13px Helvetica, Arial; - margin-left: auto; - margin-right: auto; - margin-top: auto; - margin-bottom: -2.5em; - width: 80%; - background-color: #FDF6E3; - color: #586E75; - height: 94%; -} -a { - color: #268BD2; - text-decoration: none; -} -a:hover { - text-decoration: underline; -} -blockquote { - padding: 1em 1em; - border-left: 5px solid #D33682; - margin: 0 0 1em; -} -.post-meta { - font-style: italic; -} -#header { - display: block; - height: 2em; - overflow: visible; - padding-right: 2em; - padding-left: 0.7em; - margin: auto; -} -#footer { - height: 2.5em; - padding: 1em 0; - margin-left: 2em; - border-width: 1px 0 0: -} -#header header a { - font-size: 210%; -} -#header header a:hover { - text-decoration: none; -} -#content { - min-height: 100%; - margin-bottom: -2.5em; -} -#content:after { - content: ""; - display: block; - height: 2.5em; -} -.fade { - opacity: 0.5; - transition: opacity .25s ease-in-out; - -moz-transition: opacity: .25s ease-in-out; - -webkit-transition: opacity .25s ease-in-out; -} -.fade:hover { - opacity: 1; -} -pre { - padding: 9.5px; - margin: 0 0 10px; - word-break: break-all; - word-wrap: break-word; - color: #2b2b2b; - background-color: #073642; - border: 1px solid #073642; - border-radius: 4px; -} -code { - padding: 0.25em; - font-family: DejaVu Sans Mono, Consolas; - white-space: pre-wrap; - border: 0; - border-radius: 4px; -} -pre code { - display: block; - color: #657B83; - background-color: #002B36; -} -.tags { - display: inline-block; - list-style: none; - padding-left: 0; - margin: 0 0 0 0.2em; -} -.tags li { - display: inline-block; - padding-left: 0.3em; -} -.tags li:nth-child(even) { - color: #6C71C4; -} -.colleft { - float: left; - width: 70%; - position: relative; -} -.colright { - float: right; - width: 30%; - position: relative; -} -.embed-video { - width: 75%; - margin-left: auto; - margin-right: auto; -} -figure { - width: 75%; - text-align: center; - margin: auto; -} -::-moz-selection { - background:#FF5E99; - color:#FFFFFF; - text-shadow:none -} -::selection { - background:#FF5E99; - color:#FFFFFF; - text-shadow:none -} diff --git a/blag/static/favicon.ico b/blag/static/favicon.ico deleted file mode 100644 index bd80b91..0000000 Binary files a/blag/static/favicon.ico and /dev/null differ diff --git a/blag/static/media/SentimentAnalysisTopology.png b/blag/static/media/SentimentAnalysisTopology.png deleted file mode 100644 index 44d8ede..0000000 Binary files a/blag/static/media/SentimentAnalysisTopology.png and /dev/null differ diff --git a/blag/static/media/code-branching.png b/blag/static/media/code-branching.png deleted file mode 100644 index 9c7e803..0000000 Binary files a/blag/static/media/code-branching.png and /dev/null differ diff --git a/blag/static/media/coreboot-x230-1.png b/blag/static/media/coreboot-x230-1.png deleted file mode 100644 index 65c5160..0000000 Binary files a/blag/static/media/coreboot-x230-1.png and /dev/null differ diff --git a/blag/static/media/coreboot-x230-2.png b/blag/static/media/coreboot-x230-2.png deleted file mode 100644 index e2e2406..0000000 Binary files a/blag/static/media/coreboot-x230-2.png and /dev/null differ diff --git a/blag/static/media/coreboot-x230-3.png b/blag/static/media/coreboot-x230-3.png deleted file mode 100644 index 0fa18d4..0000000 Binary files a/blag/static/media/coreboot-x230-3.png and /dev/null differ diff --git a/blag/static/media/git-branching-1.png b/blag/static/media/git-branching-1.png deleted file mode 100644 index d5b5d9e..0000000 Binary files a/blag/static/media/git-branching-1.png and /dev/null differ diff --git a/blag/static/media/git-commit-1.png b/blag/static/media/git-commit-1.png deleted file mode 100644 index 810fb05..0000000 Binary files a/blag/static/media/git-commit-1.png and /dev/null differ diff --git a/blag/static/media/git-ff-merge-1.png b/blag/static/media/git-ff-merge-1.png deleted file mode 100644 index c7dc832..0000000 Binary files a/blag/static/media/git-ff-merge-1.png and /dev/null differ diff --git a/blag/static/media/git-ff-merge-2.png b/blag/static/media/git-ff-merge-2.png deleted file mode 100644 index 04eb63a..0000000 Binary files a/blag/static/media/git-ff-merge-2.png and /dev/null differ diff --git a/blag/static/media/git-repo-state-1.svg b/blag/static/media/git-repo-state-1.svg deleted file mode 100644 index 3a64404..0000000 --- a/blag/static/media/git-repo-state-1.svg +++ /dev/null @@ -1,162 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/blag/static/media/git-repo-state-2.svg b/blag/static/media/git-repo-state-2.svg deleted file mode 100644 index 6eba189..0000000 --- a/blag/static/media/git-repo-state-2.svg +++ /dev/null @@ -1,221 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/blag/static/media/git-repo-state-3.svg b/blag/static/media/git-repo-state-3.svg deleted file mode 100644 index 49c101a..0000000 --- a/blag/static/media/git-repo-state-3.svg +++ /dev/null @@ -1,220 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/blag/static/media/git-repo-state-4.svg b/blag/static/media/git-repo-state-4.svg deleted file mode 100644 index 57c6548..0000000 --- a/blag/static/media/git-repo-state-4.svg +++ /dev/null @@ -1,232 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/blag/static/media/git-repo-state-5.svg b/blag/static/media/git-repo-state-5.svg deleted file mode 100644 index 613c0ec..0000000 --- a/blag/static/media/git-repo-state-5.svg +++ /dev/null @@ -1,249 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/blag/static/media/git-resolve-merge.png b/blag/static/media/git-resolve-merge.png deleted file mode 100644 index 507272f..0000000 Binary files a/blag/static/media/git-resolve-merge.png and /dev/null differ diff --git a/blag/static/media/git-tree-1.png b/blag/static/media/git-tree-1.png deleted file mode 100644 index 81694f4..0000000 Binary files a/blag/static/media/git-tree-1.png and /dev/null differ diff --git a/blag/static/media/git-tree-2.png b/blag/static/media/git-tree-2.png deleted file mode 100644 index b3e06eb..0000000 Binary files a/blag/static/media/git-tree-2.png and /dev/null differ diff --git a/blag/static/media/spark_issues_chart.png b/blag/static/media/spark_issues_chart.png deleted file mode 100644 index 1932741..0000000 Binary files a/blag/static/media/spark_issues_chart.png and /dev/null differ diff --git a/blag/static/media/storm_issues_chart.png b/blag/static/media/storm_issues_chart.png deleted file mode 100644 index 78bc99a..0000000 Binary files a/blag/static/media/storm_issues_chart.png and /dev/null differ diff --git a/blag/static/media/videos/comment.ogg b/blag/static/media/videos/comment.ogg deleted file mode 100644 index 44c5185..0000000 Binary files a/blag/static/media/videos/comment.ogg and /dev/null differ diff --git a/blag/static/media/videos/cw.ogg b/blag/static/media/videos/cw.ogg deleted file mode 100644 index 836536d..0000000 Binary files a/blag/static/media/videos/cw.ogg and /dev/null differ diff --git a/blag/static/media/videos/indent.ogg b/blag/static/media/videos/indent.ogg deleted file mode 100644 index d048e7c..0000000 Binary files a/blag/static/media/videos/indent.ogg and /dev/null differ diff --git a/blag/static/media/videos/macros.ogg b/blag/static/media/videos/macros.ogg deleted file mode 100644 index adda898..0000000 Binary files a/blag/static/media/videos/macros.ogg and /dev/null differ diff --git a/blag/static/robots.txt b/blag/static/robots.txt deleted file mode 100644 index f6a85d6..0000000 --- a/blag/static/robots.txt +++ /dev/null @@ -1,11 +0,0 @@ -User-agent: DuckDuckGo -Disallow: /media/ - -User-agent: Googlebot -Disallow: /media/ - -User-agent: archive.org_bot -Disallow: /media/ - -User-agent: * -Disallow: / -- cgit v1.2.1